본문 바로가기
기본소양/CODE

2. Tree based model CODE [2] Tree Model

by EXUPERY 2021. 2. 9.
반응형

Tree Model

Tree based model CODE

 

 


 

0. Reference (Baseline)

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_scoremajor = y_train.mode()[0]
y_train_pred = [major] * len(y_train)
major = y_val.mode()[0]
y_val_pred = [major] * len(y_val)
print("training accuracy: ", accuracy_score(y_train, y_train_pred))
print("validation accuracy: ", accuracy_score(y_val, y_val_pred))
print("training f1_score: ", f1_score(y_train, y_train_pred))
print("validation f1_score: ", f1_score(y_val, y_val_pred))

 

1. Pipeline

from sklearn.pipeline import make_pipeline
pipe_tree = make_pipeline(
    OneHotEncoder(use_cat_names=True), 
    SimpleImputer(), 
    DecisionTreeClassifier(random_state=2, criterion='gini')
)

pipe_tree.fit(X_train, y_train)

print('트리분류 훈련세트 정확도', pipe_tree.score(X_train, y_train))
print('트리분류 검증세트 정확도', pipe_tree.score(X_val, y_val))
print('트리분류 훈련세트 f1 score', f1_score(y_train, pipe_tree.predict(X_train)))
print('트리분류 검증세트 f1 score', f1_score(y_val, pipe_tree.predict(X_val)))

 

 

2. Visualization

!pip install -q dtreeviz
from dtreeviz.trees import *
encode = OneHotEncoder()
X_train_encode = encode.fit_transform(X_train)
imput = SimpleImputer()
X_train_imput = imput.fit_transform(X_train_encode)
clas = tree.DecisionTreeClassifier(max_depth=3)  
clas.fit(X_train_imput, y_train)

viz = dtreeviz(clas, 
               X_train_imput,
               y_train,
               feature_names=enc.transform(X_val).columns, 
               orientation='LR',
               show_node_labels = True)
viz

 

3. Accessing steps (check importance)

# 방법 1 Importance_
pipe_tree_access = make_pipeline(
  OneHotEncoder(use_cat_names=True), 
  SimpleImputer(), 
  DecisionTreeClassifier(random_state=2, criterion='gini',max_depth=9))

pipe_tree_access.fit(X_train,y_train)
model_dt = pipe_tree_access.named_steps['decisiontreeclassifier']

importances = pd.Series(model_dt.feature_importances_, encoded_columns)
plt.figure(figsize=(5,15))
importances.sort_values().plot.barh();
# 방법 2 Importance_
zipp  = []
for zipper in zip(X_train_simp.columns, Final_Model.feature_importances_):
  zipp.append(zipper)
zipp = pd.DataFrame(zipp,columns=['feature','importance']).sort_values('importance',ascending=False)
plt.figure(figsize=(15, 10))
sns.barplot(y = zipp.feature, x= zipp.importance, palette='Blues_r');

 

4. Confusion Matrix

from sklearn.metrics import classification_report
y_pred = Final_Model.predict(X_val_simp)
print(classification_report(y_val_simp, y_pred))

from sklearn import metrics
tn, fp, fn, tp = metrics.confusion_matrix(y_val_simp, y_pred).ravel()
cnf_metrix= pd.DataFrame(metrics.confusion_matrix(y_val_simp, y_pred))
cnf_metrix.columns = [['Actual_Neg','Actual_Pos']]
cnf_metrix.index = [['Pred_Neg','Pred_Pos']]

plt.figure(figsize=(7,5))
sns.heatmap(cnf_metrix, annot=True, cmap='Blues', fmt='g')
plt.title("Confusion matrix", y=1.1)
plt.xlabel("Actual label")
plt.ylabel("Predict label")
plt.show()
print('accuracy score :', accuracy_score(y_val_simp, y_pred))
print('f1 score :', f1_score(y_val_simp, y_pred))

 

반응형

댓글