본문 바로가기
기본소양/CODE

3. Applied Predictive Modeling [2] Importance

by EXUPERY 2021. 2. 18.
반응형

Importance

Applied Predictive Modeling  

 

 


 

1. Feature Importance

zipp  = []
for zipper in zip(X_train.columns, pipe.named_steps['decisiontreeregressor'].feature_importances_):
  zipp.append(zipper)
zipp = pd.DataFrame(zipp,columns=['feature','importance']).sort_values('importance',ascending=False)
plt.figure(figsize=(15, 15))
sns.barplot(y = zipp.feature, x= zipp.importance, palette='Blues_r')
plt.title("Tree Decision");

 

2. Permutation Importance

!pip install eli5
import eli5
from eli5.sklearn import PermutationImportance
#
permuter = PermutationImportance(
    pipe.named_steps['decisiontreeregressor'],
    n_iter=5, 
    random_state=2
)
#
X_test_transformed = pipe.named_steps['ordinalencoder'].transform(X_test)
permuter.fit(X_test_transformed, y_test);
#
feature_names = X_test.columns.tolist()
pd.Series(permuter.feature_importances_, feature_names).sort_values()
#
eli5.show_weights(
    permuter, 
    top=None,
    feature_names=feature_names
)

 

3. PDPBOX

!pip install pdpbox
from pdpbox.pdp import pdp_isolate, pdp_plot
isolated = pdp_isolate(
    model=model, 
    dataset=X_test_encoded, 
    model_features=X_test_encoded.columns, 
    feature='전용면적',
    grid_type='percentile',
    num_grid_points=10
)

# PDP
pdp_plot(isolated, feature_name='전용면적');

# PDP with Icecurve
pdp_plot(isolated
         , feature_name='전용면적'
         , plot_lines=True
         , frac_to_plot=0.001
         , plot_pts_dist=True) 
         
# Interact
from pdpbox.pdp import pdp_interact, pdp_interact_plot
features = ['전용면적', 'IXIC']
interaction = pdp_interact(
    model=model, 
    dataset=X_test_encoded[:500],
    model_features=X_test_encoded.columns, 
    features=features
)
pdp_interact_plot(interaction, plot_type='grid', 
                  feature_names=features,figsize = (15,15))
                  
# Interact in plotly
import plotly.graph_objs as go
pdp = interaction.pdp.pivot_table(
    values='preds', 
    columns=features[0], 
    index=features[1]
)[::-1]

surface = go.Surface(
    x=pdp.columns, 
    y=pdp.index, 
    z=pdp.values
)


layout = go.Layout(
    scene=dict(
        xaxis=dict(title=features[0]), 
        yaxis=dict(title=features[1]), 
        zaxis=dict(title=target)
    )
)

fig = go.Figure(surface, layout)
fig.show()

 

4. Shap

!pip install shap
import shap

# Summary plot
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_train_encoded.iloc[10000:10500])
shap.summary_plot(shap_values, X_test_encoded.iloc[10000:10500])

# Heatmap
explainer2 = shap.Explainer(model, X_train_encoded.iloc[10000:10500])
shap_values2 = explainer2(X_test_encoded.iloc[10000:10500])
shap.plots.heatmap(shap_values2[0:10])

# Bar
shap.plots.bar(shap_values2) # Global
shap.plots.bar(shap_values2[0]) # Local
반응형

댓글