diff --git a/examples/credit-risk-end-to-end/01_Credit_Risk_Data_Prep.ipynb b/examples/credit-risk-end-to-end/01_Credit_Risk_Data_Prep.ipynb new file mode 100644 index 00000000000..a345ec8ca46 --- /dev/null +++ b/examples/credit-risk-end-to-end/01_Credit_Risk_Data_Prep.ipynb @@ -0,0 +1,757 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a52c80c4-1ea2-4d1e-b582-fac51081e76d", + "metadata": {}, + "source": [ + "
\n", + " | 0 | \n", + "1 | \n", + "2 | \n", + "
---|---|---|---|
ID | \n", + "0 | \n", + "1 | \n", + "2 | \n", + "
checking_status | \n", + "<0 | \n", + "0<=X<200 | \n", + "no checking | \n", + "
duration | \n", + "6 | \n", + "48 | \n", + "12 | \n", + "
credit_history | \n", + "critical/other existing credit | \n", + "existing paid | \n", + "critical/other existing credit | \n", + "
purpose | \n", + "radio/tv | \n", + "radio/tv | \n", + "education | \n", + "
credit_amount | \n", + "1169 | \n", + "5951 | \n", + "2096 | \n", + "
savings_status | \n", + "no known savings | \n", + "<100 | \n", + "<100 | \n", + "
employment | \n", + ">=7 | \n", + "1<=X<4 | \n", + "4<=X<7 | \n", + "
installment_commitment | \n", + "4 | \n", + "2 | \n", + "2 | \n", + "
personal_status | \n", + "male single | \n", + "female div/dep/mar | \n", + "male single | \n", + "
other_parties | \n", + "none | \n", + "none | \n", + "none | \n", + "
residence_since | \n", + "4 | \n", + "2 | \n", + "3 | \n", + "
property_magnitude | \n", + "real estate | \n", + "real estate | \n", + "real estate | \n", + "
age | \n", + "67 | \n", + "22 | \n", + "49 | \n", + "
other_payment_plans | \n", + "none | \n", + "none | \n", + "none | \n", + "
housing | \n", + "own | \n", + "own | \n", + "own | \n", + "
existing_credits | \n", + "2 | \n", + "1 | \n", + "1 | \n", + "
job | \n", + "skilled | \n", + "skilled | \n", + "unskilled resident | \n", + "
num_dependents | \n", + "1 | \n", + "1 | \n", + "2 | \n", + "
own_telephone | \n", + "yes | \n", + "none | \n", + "none | \n", + "
foreign_worker | \n", + "yes | \n", + "yes | \n", + "yes | \n", + "
class | \n", + "good | \n", + "bad | \n", + "good | \n", + "
application_timestamp | \n", + "2023-10-04 17:50:13 | \n", + "2023-09-28 18:10:13 | \n", + "2023-10-03 23:06:03 | \n", + "
\n", + " | ID | \n", + "class | \n", + "outcome_timestamp | \n", + "
---|---|---|---|
0 | \n", + "0 | \n", + "good | \n", + "2023-11-24 22:50:13 | \n", + "
1 | \n", + "1 | \n", + "bad | \n", + "2023-11-03 12:10:13 | \n", + "
2 | \n", + "2 | \n", + "good | \n", + "2023-11-30 22:06:03 | \n", + "
\n", + " | ID | \n", + "class | \n", + "outcome_timestamp | \n", + "
---|---|---|---|
0 | \n", + "0 | \n", + "good | \n", + "2023-11-24 22:50:13 | \n", + "
1 | \n", + "1 | \n", + "bad | \n", + "2023-11-03 12:10:13 | \n", + "
2 | \n", + "2 | \n", + "good | \n", + "2023-11-30 22:06:03 | \n", + "
3 | \n", + "3 | \n", + "good | \n", + "2023-11-17 07:37:19 | \n", + "
4 | \n", + "4 | \n", + "bad | \n", + "2023-12-01 05:01:48 | \n", + "
\n", + " | ID | \n", + "class | \n", + "outcome_timestamp | \n", + "duration | \n", + "credit_amount | \n", + "installment_commitment | \n", + "checking_status | \n", + "residence_since | \n", + "age | \n", + "existing_credits | \n", + "num_dependents | \n", + "housing | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|
18 | \n", + "473 | \n", + "good | \n", + "2023-12-16 03:29:12+00:00 | \n", + "6 | \n", + "1238 | \n", + "4 | \n", + "no checking | \n", + "4 | \n", + "36 | \n", + "1 | \n", + "2 | \n", + "own | \n", + "
764 | \n", + "894 | \n", + "good | \n", + "2023-11-15 23:19:35+00:00 | \n", + "18 | \n", + "1169 | \n", + "4 | \n", + "no checking | \n", + "3 | \n", + "29 | \n", + "2 | \n", + "1 | \n", + "own | \n", + "
504 | \n", + "318 | \n", + "good | \n", + "2023-11-23 13:03:53+00:00 | \n", + "12 | \n", + "701 | \n", + "4 | \n", + "no checking | \n", + "2 | \n", + "32 | \n", + "2 | \n", + "1 | \n", + "own | \n", + "
454 | \n", + "340 | \n", + "good | \n", + "2023-12-26 17:59:37+00:00 | \n", + "24 | \n", + "5743 | \n", + "2 | \n", + "0<=X<200 | \n", + "4 | \n", + "24 | \n", + "2 | \n", + "1 | \n", + "for free | \n", + "
453 | \n", + "605 | \n", + "good | \n", + "2023-12-18 11:27:02+00:00 | \n", + "24 | \n", + "2828 | \n", + "4 | \n", + "<0 | \n", + "4 | \n", + "22 | \n", + "1 | \n", + "1 | \n", + "own | \n", + "
\n", + " | checking_status | \n", + "housing | \n", + "duration | \n", + "credit_amount | \n", + "installment_commitment | \n", + "residence_since | \n", + "age | \n", + "existing_credits | \n", + "num_dependents | \n", + "
---|---|---|---|---|---|---|---|---|---|
18 | \n", + "3.0 | \n", + "1.0 | \n", + "6.0 | \n", + "1238.0 | \n", + "4.0 | \n", + "4.0 | \n", + "36.0 | \n", + "1.0 | \n", + "2.0 | \n", + "
764 | \n", + "3.0 | \n", + "1.0 | \n", + "18.0 | \n", + "1169.0 | \n", + "4.0 | \n", + "3.0 | \n", + "29.0 | \n", + "2.0 | \n", + "1.0 | \n", + "
504 | \n", + "3.0 | \n", + "1.0 | \n", + "12.0 | \n", + "701.0 | \n", + "4.0 | \n", + "2.0 | \n", + "32.0 | \n", + "2.0 | \n", + "1.0 | \n", + "
454 | \n", + "0.0 | \n", + "0.0 | \n", + "24.0 | \n", + "5743.0 | \n", + "2.0 | \n", + "4.0 | \n", + "24.0 | \n", + "2.0 | \n", + "1.0 | \n", + "
453 | \n", + "1.0 | \n", + "1.0 | \n", + "24.0 | \n", + "2828.0 | \n", + "4.0 | \n", + "4.0 | \n", + "22.0 | \n", + "1.0 | \n", + "1.0 | \n", + "
Pipeline(steps=[('transform',\n", + " ColumnTransformer(transformers=[('cat_features',\n", + " OrdinalEncoder(),\n", + " ['checking_status',\n", + " 'housing']),\n", + " ('num_features', 'passthrough',\n", + " ['duration', 'credit_amount',\n", + " 'installment_commitment',\n", + " 'residence_since', 'age',\n", + " 'existing_credits',\n", + " 'num_dependents'])])),\n", + " ('rf_model',\n", + " RandomForestClassifier(class_weight={0: 5, 1: 1},\n", + " criterion='entropy', max_depth=4,\n", + " min_samples_leaf=10, n_estimators=400,\n", + " random_state=142))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('transform',\n", + " ColumnTransformer(transformers=[('cat_features',\n", + " OrdinalEncoder(),\n", + " ['checking_status',\n", + " 'housing']),\n", + " ('num_features', 'passthrough',\n", + " ['duration', 'credit_amount',\n", + " 'installment_commitment',\n", + " 'residence_since', 'age',\n", + " 'existing_credits',\n", + " 'num_dependents'])])),\n", + " ('rf_model',\n", + " RandomForestClassifier(class_weight={0: 5, 1: 1},\n", + " criterion='entropy', max_depth=4,\n", + " min_samples_leaf=10, n_estimators=400,\n", + " random_state=142))])
ColumnTransformer(transformers=[('cat_features', OrdinalEncoder(),\n", + " ['checking_status', 'housing']),\n", + " ('num_features', 'passthrough',\n", + " ['duration', 'credit_amount',\n", + " 'installment_commitment', 'residence_since',\n", + " 'age', 'existing_credits',\n", + " 'num_dependents'])])
['checking_status', 'housing']
OrdinalEncoder()
['duration', 'credit_amount', 'installment_commitment', 'residence_since', 'age', 'existing_credits', 'num_dependents']
passthrough
RandomForestClassifier(class_weight={0: 5, 1: 1}, criterion='entropy',\n", + " max_depth=4, min_samples_leaf=10, n_estimators=400,\n", + " random_state=142)
\n", + " | ID | \n", + "checking_status | \n", + "duration | \n", + "installment_commitment | \n", + "credit_amount | \n", + "residence_since | \n", + "num_dependents | \n", + "age | \n", + "housing | \n", + "existing_credits | \n", + "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "18 | \n", + "0<=X<200 | \n", + "24.0 | \n", + "4.0 | \n", + "12579.0 | \n", + "2.0 | \n", + "1.0 | \n", + "44.0 | \n", + "for free | \n", + "1.0 | \n", + "
1 | \n", + "764 | \n", + "no checking | \n", + "24.0 | \n", + "4.0 | \n", + "2463.0 | \n", + "3.0 | \n", + "1.0 | \n", + "27.0 | \n", + "own | \n", + "2.0 | \n", + "
2 | \n", + "504 | \n", + "<0 | \n", + "24.0 | \n", + "4.0 | \n", + "1207.0 | \n", + "4.0 | \n", + "1.0 | \n", + "24.0 | \n", + "rent | \n", + "1.0 | \n", + "
\n", + " | ID | \n", + "credit_amount | \n", + "installment_commitment | \n", + "checking_status | \n", + "duration | \n", + "num_dependents | \n", + "housing | \n", + "age | \n", + "residence_since | \n", + "existing_credits | \n", + "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "18 | \n", + "12579.0 | \n", + "4.0 | \n", + "0<=X<200 | \n", + "24.0 | \n", + "1.0 | \n", + "for free | \n", + "44.0 | \n", + "2.0 | \n", + "1.0 | \n", + "
1 | \n", + "764 | \n", + "2463.0 | \n", + "4.0 | \n", + "no checking | \n", + "24.0 | \n", + "1.0 | \n", + "own | \n", + "27.0 | \n", + "3.0 | \n", + "2.0 | \n", + "
2 | \n", + "504 | \n", + "1207.0 | \n", + "4.0 | \n", + "<0 | \n", + "24.0 | \n", + "1.0 | \n", + "rent | \n", + "24.0 | \n", + "4.0 | \n", + "1.0 | \n", + "
\n", + " | ID | \n", + "Prediction | \n", + "Loan_Designation | \n", + "True_Value | \n", + "
---|---|---|---|---|
18 | \n", + "18 | \n", + "0.0 | \n", + "bad | \n", + "bad | \n", + "
764 | \n", + "764 | \n", + "1.0 | \n", + "good | \n", + "good | \n", + "
504 | \n", + "504 | \n", + "0.0 | \n", + "bad | \n", + "bad | \n", + "
454 | \n", + "454 | \n", + "0.0 | \n", + "bad | \n", + "bad | \n", + "
453 | \n", + "453 | \n", + "1.0 | \n", + "good | \n", + "good | \n", + "
0 | \n", + "0 | \n", + "1.0 | \n", + "good | \n", + "good | \n", + "
1 | \n", + "1 | \n", + "0.0 | \n", + "bad | \n", + "bad | \n", + "
2 | \n", + "2 | \n", + "1.0 | \n", + "good | \n", + "good | \n", + "
3 | \n", + "3 | \n", + "0.0 | \n", + "bad | \n", + "good | \n", + "
4 | \n", + "4 | \n", + "0.0 | \n", + "bad | \n", + "bad | \n", + "
5 | \n", + "5 | \n", + "1.0 | \n", + "good | \n", + "good | \n", + "
6 | \n", + "6 | \n", + "1.0 | \n", + "good | \n", + "good | \n", + "
7 | \n", + "7 | \n", + "0.0 | \n", + "bad | \n", + "good | \n", + "
8 | \n", + "8 | \n", + "1.0 | \n", + "good | \n", + "good | \n", + "