Skip to content
This repository has been archived by the owner on Oct 31, 2019. It is now read-only.

implementing mlp on wine dataset #136

Merged
merged 1 commit into from
Oct 10, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 173 additions & 0 deletions machine_learning/wine_quality/mlp/wine_mlp.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.neural_network import MLPClassifier\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Loading Dataset\n",
"dataset_url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv\"\n",
"dataset = pd.read_csv(dataset_url, sep=';')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4898, 12)\n"
]
}
],
"source": [
"# Dimensions of dataset\n",
"print(dataset.shape)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n1833 7.4 0.49 0.24 15.1 0.030 \n3374 5.9 0.19 0.21 1.7 0.045 \n2270 7.3 0.19 0.25 1.4 0.051 \n2961 6.9 0.23 0.41 8.0 0.030 \n4451 6.3 0.30 0.19 7.7 0.049 \n\n free sulfur dioxide total sulfur dioxide density pH sulphates \\\n1833 34.0 153.0 0.99530 3.13 0.51 \n3374 57.0 135.0 0.99341 3.32 0.44 \n2270 41.0 107.0 0.99382 3.53 0.66 \n2961 30.0 114.0 0.99368 3.22 0.54 \n4451 47.0 184.0 0.99514 3.22 0.48 \n\n alcohol quality \n1833 12.0 7 \n3374 9.5 5 \n2270 10.5 7 \n2961 11.0 6 \n4451 9.5 5 \n"
]
}
],
"source": [
"# 5 random rows of the dataset\n",
"print(dataset.sample(5))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" fixed acidity volatile acidity citric acid residual sugar \\\ncount 4898.000000 4898.000000 4898.000000 4898.000000 \nmean 6.854788 0.278241 0.334192 6.391415 \nstd 0.843868 0.100795 0.121020 5.072058 \nmin 3.800000 0.080000 0.000000 0.600000 \n25% 6.300000 0.210000 0.270000 1.700000 \n50% 6.800000 0.260000 0.320000 5.200000 \n75% 7.300000 0.320000 0.390000 9.900000 \nmax 14.200000 1.100000 1.660000 65.800000 \n\n chlorides free sulfur dioxide total sulfur dioxide density \\\ncount 4898.000000 4898.000000 4898.000000 4898.000000 \nmean 0.045772 35.308085 138.360657 0.994027 \nstd 0.021848 17.007137 42.498065 0.002991 \nmin 0.009000 2.000000 9.000000 0.987110 \n25% 0.036000 23.000000 108.000000 0.991723 \n50% 0.043000 34.000000 134.000000 0.993740 \n75% 0.050000 46.000000 167.000000 0.996100 \nmax 0.346000 289.000000 440.000000 1.038980 \n\n pH sulphates alcohol quality \ncount 4898.000000 4898.000000 4898.000000 4898.000000 \nmean 3.188267 0.489847 10.514267 5.877909 \nstd 0.151001 0.114126 1.230621 0.885639 \nmin 2.720000 0.220000 8.000000 3.000000 \n25% 3.090000 0.410000 9.500000 5.000000 \n50% 3.180000 0.470000 10.400000 6.000000 \n75% 3.280000 0.550000 11.400000 6.000000 \nmax 3.820000 1.080000 14.200000 9.000000 \n"
]
}
],
"source": [
"# Description of dataset\n",
"print(dataset.describe())"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Splitting the dataset\n",
"X = dataset.drop('quality', axis=1)\n",
"y = dataset['quality']\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.4972170686456401\n"
]
}
],
"source": [
"# 1. Train the model on training data(X_train, y_train)\n",
"model = MLPClassifier(hidden_layer_sizes=(10, 5), activation='logistic')\n",
"model.fit(X_train, y_train)\n",
"\n",
"# 2. Predict target values using test data(X_test)\n",
"y_predict = model.predict(X_test)\n",
"\n",
"print(accuracy_score(y_test,y_predict))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.5083487940630798\n"
]
}
],
"source": [
"# 1. Train the model on training data(X_train, y_train)\n",
"model = MLPClassifier(hidden_layer_sizes=(25, 15, 10), activation='logistic', max_iter=500)\n",
"model.fit(X_train, y_train)\n",
"\n",
"# 2. Predict target values using test data(X_test)\n",
"y_predict = model.predict(X_test)\n",
"\n",
"print(accuracy_score(y_test,y_predict))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2.0
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}