This repository has been archived by the owner on Oct 31, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 102
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added the Decision tree classifier (#224)
* Random Forest Classifier implemented * removed the .ipynb_checkpoints in machine-learning/iris * Removed the .ipynb_checkpoints in machine_learning/iris/rfc * Decision Tree Classifier implemented * removed the directory md that was in the repo but not locally
- Loading branch information
1 parent
042120c
commit 1ad1e90
Showing
2 changed files
with
216 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Decision Tree classifier implemented |
215 changes: 215 additions & 0 deletions
215
machine_learning/iris/dtc/decision_tree_implementation.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"from sklearn.model_selection import train_test_split\n", | ||
"from sklearn.datasets import load_iris\n", | ||
"from sklearn.model_selection import cross_val_score\n", | ||
"from sklearn.tree import DecisionTreeClassifier" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Import dataset\n", | ||
"dataset_url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data\"\n", | ||
"column_names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']\n", | ||
"dataset = pd.read_csv(dataset_url, names=column_names)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"(150, 5)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# Dimensions of dataset\n", | ||
"print(dataset.shape)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 12, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" sepal-length sepal-width petal-length petal-width class\n", | ||
"30 4.8 3.1 1.6 0.2 Iris-setosa\n", | ||
"82 5.8 2.7 3.9 1.2 Iris-versicolor\n", | ||
"108 6.7 2.5 5.8 1.8 Iris-virginica\n", | ||
"32 5.2 4.1 1.5 0.1 Iris-setosa\n", | ||
"90 5.5 2.6 4.4 1.2 Iris-versicolor\n", | ||
"55 5.7 2.8 4.5 1.3 Iris-versicolor\n", | ||
"31 5.4 3.4 1.5 0.4 Iris-setosa\n", | ||
"130 7.4 2.8 6.1 1.9 Iris-virginica\n", | ||
"6 4.6 3.4 1.4 0.3 Iris-setosa\n", | ||
"48 5.3 3.7 1.5 0.2 Iris-setosa\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# 10 random rows of the dataset\n", | ||
"print(dataset.sample(10))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" sepal-length sepal-width petal-length petal-width\n", | ||
"count 150.000000 150.000000 150.000000 150.000000\n", | ||
"mean 5.843333 3.054000 3.758667 1.198667\n", | ||
"std 0.828066 0.433594 1.764420 0.763161\n", | ||
"min 4.300000 2.000000 1.000000 0.100000\n", | ||
"25% 5.100000 2.800000 1.600000 0.300000\n", | ||
"50% 5.800000 3.000000 4.350000 1.300000\n", | ||
"75% 6.400000 3.300000 5.100000 1.800000\n", | ||
"max 7.900000 4.400000 6.900000 2.500000\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# Description of dataset\n", | ||
"print(dataset.describe())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 14, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"X = dataset.iloc[:,0:4] \n", | ||
"y = dataset['class']\n", | ||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 28, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# 1. Train the model on training data(X_train, y_train)\n", | ||
"dtc = DecisionTreeClassifier(random_state=0)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 35, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", | ||
" max_features=None, max_leaf_nodes=None,\n", | ||
" min_impurity_decrease=0.0, min_impurity_split=None,\n", | ||
" min_samples_leaf=1, min_samples_split=2,\n", | ||
" min_weight_fraction_leaf=0.0, presort=False, random_state=0,\n", | ||
" splitter='best')" | ||
] | ||
}, | ||
"execution_count": 35, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# 2. Predict target values using test data(X_test)\n", | ||
"dtc.fit(X_train, y_train)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 37, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"[ 0.01256535 0.02915555 0.05981177 0.89846733]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(dtc.feature_importances_)\n", | ||
"#Predict target values using test data(X_test)\n", | ||
"predictions = dtc.predict(X_test)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 34, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Accuracy Score is :0.96\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# 3. Find accuracy of the model comparing with actual test data targets(y_test)\n", | ||
"score = dtc.score(X_test,y_test)\n", | ||
"print(\"Accuracy Score is :\" + str(score))" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |