본문 바로가기
기본소양/CODE

2. Tree based model CODE [3] Model Selection

by EXUPERY 2021. 2. 9.
반응형

Model Selection

Tree based model CODE 

 

 


 

1. LogisticsCV

from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import StandardScaler
lr = LogisticRegressionCV()
lr.fit(X_train_simp,y_train_simp)
print('훈련 정확도 : ',lr.score(X_train_simp, y_train_simp))
print('검증 정확도 : ',lr.score(X_val_simp, y_val_simp))
print('훈련 f1 score : ',f1_score(y_train_simp, lr.predict(X_train_simp)))
print('검증 f1 score : ',f1_score(y_val_simp, lr.predict(X_val_simp)))

 

2. Decision Tree Classifier

from sklearn.tree import DecisionTreeClassifier
dtr = DecisionTreeClassifier(max_depth=7,min_samples_leaf=5,max_features=0.8,random_state=1)
dtr.fit(X_train_simp,y_train_simp)
print('훈련 정확도 : ',dtr.score(X_train_simp, y_train_simp))
print('검증 정확도 : ',dtr.score(X_val_simp, y_val_simp))
print('훈련 f1 score : ',f1_score(y_train_simp, dtr.predict(X_train_simp)))
print('검증 f1 score : ',f1_score(y_val_simp, dtr.predict(X_val_simp)))

 

3. Random Forest Classifier

Final_Model_4 = RandomForestClassifier(max_depth=18,
                                     criterion='entropy',
                                     min_samples_leaf=4, 
                                     n_jobs=-1, 
                                     random_state=1000, 
                                     oob_score=True)

Final_Model_4.fit(X_train_simp, y_train_simp)

print('훈련 정확도 : ',Final_Model_4.score(X_train_simp, y_train_simp))
print('검증 정확도 : ',Final_Model_4.score(X_val_simp, y_val_simp))
print('훈련 f1 score : ',f1_score(y_train_simp, Final_Model_4.predict(X_train_simp)))
print('검증 f1 score : ',f1_score(y_val_simp, Final_Model_4.predict(X_val_simp)))

 

4. ROC Curve

from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

#probs
ns_probs = [0 for _ in range(len(y_val_simp))]

lr_probs = lr.predict_proba(X_val_simp)
lr_probs = lr_probs[:, 1]

dtr_probs = dtr.predict_proba(X_val_simp)
dtr_probs = dtr_probs[:, 1]

rf_probs = Final_Model_4.predict_proba(X_val_simp)
rf_probs = rf_probs[:, 1]

#auc
ns_auc = roc_auc_score(y_val_simp, ns_probs)
lr_auc = roc_auc_score(y_val_simp, lr_probs)
dtr_auc = roc_auc_score(y_val_simp, dtr_probs)
rf_auc = roc_auc_score(y_val_simp, rf_probs)

#curve
ns_fpr, ns_tpr, _ = roc_curve(y_val_simp, ns_probs)
lr_fpr, lr_tpr, _ = roc_curve(y_val_simp, lr_probs)
dtr_fpr, dtr_tpr, _ = roc_curve(y_val_simp, dtr_probs)
rf_fpr, rf_tpr, thresholds = roc_curve(y_val_simp, rf_probs)

#visualize
print('Baseline: ROC AUC=%.2f' % (ns_auc))
print('Logistic: ROC AUC=%.2f' % (lr_auc))
print('Decision Tree: ROC AUC=%.2f' % (dtr_auc))
print('Random Forest: ROC AUC=%.2f' % (rf_auc))
plt.figure(figsize=(8,8))
plt.plot(ns_fpr, ns_tpr, linestyle='--', label='Baseline')
plt.plot(lr_fpr, lr_tpr, marker='.', label='Logistic')
plt.plot(dtr_fpr, dtr_tpr, marker='.', label='Decision Tree')
plt.plot(rf_fpr, rf_tpr, marker='.', label='Random Forest')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.show()

 

반응형

댓글