Skip to content
This repository has been archived by the owner on Oct 31, 2019. It is now read-only.

Commit

Permalink
Added the Decision tree classifier (#224)
Browse files Browse the repository at this point in the history
*  Random Forest Classifier implemented

* removed the .ipynb_checkpoints in machine-learning/iris

*  Removed the .ipynb_checkpoints in machine_learning/iris/rfc

*  Decision Tree Classifier implemented

*  removed the directory md that was in the repo but not locally
  • Loading branch information
bhuvanakundumani authored and ShashankP19 committed Oct 20, 2018
1 parent 042120c commit 1ad1e90
Show file tree
Hide file tree
Showing 2 changed files with 216 additions and 0 deletions.
1 change: 1 addition & 0 deletions machine_learning/iris/dtc/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Decision Tree classifier implemented
215 changes: 215 additions & 0 deletions machine_learning/iris/dtc/decision_tree_implementation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.datasets import load_iris\n",
"from sklearn.model_selection import cross_val_score\n",
"from sklearn.tree import DecisionTreeClassifier"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Import dataset\n",
"dataset_url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data\"\n",
"column_names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']\n",
"dataset = pd.read_csv(dataset_url, names=column_names)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(150, 5)\n"
]
}
],
"source": [
"# Dimensions of dataset\n",
"print(dataset.shape)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" sepal-length sepal-width petal-length petal-width class\n",
"30 4.8 3.1 1.6 0.2 Iris-setosa\n",
"82 5.8 2.7 3.9 1.2 Iris-versicolor\n",
"108 6.7 2.5 5.8 1.8 Iris-virginica\n",
"32 5.2 4.1 1.5 0.1 Iris-setosa\n",
"90 5.5 2.6 4.4 1.2 Iris-versicolor\n",
"55 5.7 2.8 4.5 1.3 Iris-versicolor\n",
"31 5.4 3.4 1.5 0.4 Iris-setosa\n",
"130 7.4 2.8 6.1 1.9 Iris-virginica\n",
"6 4.6 3.4 1.4 0.3 Iris-setosa\n",
"48 5.3 3.7 1.5 0.2 Iris-setosa\n"
]
}
],
"source": [
"# 10 random rows of the dataset\n",
"print(dataset.sample(10))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" sepal-length sepal-width petal-length petal-width\n",
"count 150.000000 150.000000 150.000000 150.000000\n",
"mean 5.843333 3.054000 3.758667 1.198667\n",
"std 0.828066 0.433594 1.764420 0.763161\n",
"min 4.300000 2.000000 1.000000 0.100000\n",
"25% 5.100000 2.800000 1.600000 0.300000\n",
"50% 5.800000 3.000000 4.350000 1.300000\n",
"75% 6.400000 3.300000 5.100000 1.800000\n",
"max 7.900000 4.400000 6.900000 2.500000\n"
]
}
],
"source": [
"# Description of dataset\n",
"print(dataset.describe())"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"X = dataset.iloc[:,0:4] \n",
"y = dataset['class']\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# 1. Train the model on training data(X_train, y_train)\n",
"dtc = DecisionTreeClassifier(random_state=0)\n"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n",
" max_features=None, max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, presort=False, random_state=0,\n",
" splitter='best')"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 2. Predict target values using test data(X_test)\n",
"dtc.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 0.01256535 0.02915555 0.05981177 0.89846733]\n"
]
}
],
"source": [
"print(dtc.feature_importances_)\n",
"#Predict target values using test data(X_test)\n",
"predictions = dtc.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy Score is :0.96\n"
]
}
],
"source": [
"# 3. Find accuracy of the model comparing with actual test data targets(y_test)\n",
"score = dtc.score(X_test,y_test)\n",
"print(\"Accuracy Score is :\" + str(score))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 1ad1e90

Please sign in to comment.