diff --git a/README.md b/README.md index eae69b1..d430c45 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,7 @@ $ action-rules --min_stable_attributes 2 --min_flexible_attributes 1 --min_undes * [Scalene Profiling](https://github.com/lukassykora/action-rules/blob/main/notebooks/profiling/plot.ipynb) * [GPU Memory Usage - Sparse vs. Dense Matrix](https://github.com/lukassykora/action-rules/blob/main/notebooks/gpu_sparse_vs_dense/process_logs.ipynb) * [CPU Usage](https://github.com/lukassykora/action-rules/blob/main/notebooks/cpu_cores/cpu_usage.ipynb) +* [Compare the Action-Rules package with ActionRulesDiscovery package](https://github.com/lukassykora/action-rules/blob/main/notebooks/Comparison.ipynb) - link to ActionRulesDiscovery package: ## Credits diff --git a/notebooks/Comparison.ipynb b/notebooks/Comparison.ipynb new file mode 100644 index 0000000..3ddd29f --- /dev/null +++ b/notebooks/Comparison.ipynb @@ -0,0 +1,669 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c2956994-b9ab-46a0-a1c4-a606cf6a3085", + "metadata": {}, + "source": [ + "# Compare the Action-Rules package with ActionRulesDiscovery package" + ] + }, + { + "cell_type": "markdown", + "id": "94602578-2145-4928-9547-3f96d0b1c060", + "metadata": {}, + "source": [ + "## Setting" + ] + }, + { + "cell_type": "markdown", + "id": "628baa40-9c72-46c5-80bd-d3f1dacd9256", + "metadata": {}, + "source": [ + "### Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0b2e1a5e-65ad-4acc-a677-6a8678cf3079", + "metadata": {}, + "outputs": [], + "source": [ + "stable_attributes = [\"gender\", \"SeniorCitizen\", \"Partner\"]\n", + "flexible_attributes = [\"PhoneService\", \n", + " \"InternetService\", \n", + " \"OnlineSecurity\", \n", + " \"DeviceProtection\", \n", + " \"TechSupport\",\n", + " \"StreamingTV\"]\n", + "target = 'Churn'\n", + "min_stable_attributes = 2\n", + "min_flexible_attributes = 1 #min 1\n", + "min_undesired_support = 20\n", + "min_undesired_confidence = 0.6\n", + "min_desired_support = 20\n", + "min_desired_confidence = 0.6\n", + "undesired_state = 'Yes'\n", + "desired_state = 'No'" + ] + }, + { + "cell_type": "markdown", + "id": "cbf04d02-ca32-4f89-a724-8b99f068f275", + "metadata": {}, + "source": [ + "## Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e82e6386-78dc-4dd7-81e9-534c5c2c9c95", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "f9611d4d-946c-48be-89b8-b8f809bf2e84", + "metadata": {}, + "source": [ + "Import data to Pandas DataFrame and increase the table tenfold." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "624ee6c0-4f34-4939-a754-9029507e50fe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerIDgenderSeniorCitizenPartnerDependentstenurePhoneServiceMultipleLinesInternetServiceOnlineSecurityOnlineBackupDeviceProtectionTechSupportStreamingTVStreamingMoviesContractPaperlessBillingPaymentMethodMonthlyChargesTotalChargesChurn
07590-VHVEGFemale0YesNo1NoNo phone serviceDSLNoYesNoNoNoNoMonth-to-monthYesElectronic check29.8529.85No
15575-GNVDEMale0NoNo34YesNoDSLYesNoYesNoNoNoOne yearNoMailed check56.951889.5No
23668-QPYBKMale0NoNo2YesNoDSLYesYesNoNoNoNoMonth-to-monthYesMailed check53.85108.15Yes
37795-CFOCWMale0NoNo45NoNo phone serviceDSLYesNoYesYesNoNoOne yearNoBank transfer (automatic)42.301840.75No
49237-HQITUFemale0NoNo2YesNoFiber opticNoNoNoNoNoNoMonth-to-monthYesElectronic check70.70151.65Yes
..................................................................
70386840-RESVBMale0YesYes24YesYesDSLYesNoYesYesYesYesOne yearYesMailed check84.801990.5No
70392234-XADUHFemale0YesYes72YesYesFiber opticNoYesYesNoYesYesOne yearYesCredit card (automatic)103.207362.9No
70404801-JZAZLFemale0YesYes11NoNo phone serviceDSLYesNoNoNoNoNoMonth-to-monthYesElectronic check29.60346.45No
70418361-LTMKDMale1YesNo4YesYesFiber opticNoNoNoNoNoNoMonth-to-monthYesMailed check74.40306.6Yes
70423186-AJIEKMale0NoNo66YesNoFiber opticYesNoYesYesYesYesTwo yearYesBank transfer (automatic)105.656844.5No
\n", + "

7043 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " customerID gender SeniorCitizen Partner Dependents tenure \\\n", + "0 7590-VHVEG Female 0 Yes No 1 \n", + "1 5575-GNVDE Male 0 No No 34 \n", + "2 3668-QPYBK Male 0 No No 2 \n", + "3 7795-CFOCW Male 0 No No 45 \n", + "4 9237-HQITU Female 0 No No 2 \n", + "... ... ... ... ... ... ... \n", + "7038 6840-RESVB Male 0 Yes Yes 24 \n", + "7039 2234-XADUH Female 0 Yes Yes 72 \n", + "7040 4801-JZAZL Female 0 Yes Yes 11 \n", + "7041 8361-LTMKD Male 1 Yes No 4 \n", + "7042 3186-AJIEK Male 0 No No 66 \n", + "\n", + " PhoneService MultipleLines InternetService OnlineSecurity \\\n", + "0 No No phone service DSL No \n", + "1 Yes No DSL Yes \n", + "2 Yes No DSL Yes \n", + "3 No No phone service DSL Yes \n", + "4 Yes No Fiber optic No \n", + "... ... ... ... ... \n", + "7038 Yes Yes DSL Yes \n", + "7039 Yes Yes Fiber optic No \n", + "7040 No No phone service DSL Yes \n", + "7041 Yes Yes Fiber optic No \n", + "7042 Yes No Fiber optic Yes \n", + "\n", + " OnlineBackup DeviceProtection TechSupport StreamingTV StreamingMovies \\\n", + "0 Yes No No No No \n", + "1 No Yes No No No \n", + "2 Yes No No No No \n", + "3 No Yes Yes No No \n", + "4 No No No No No \n", + "... ... ... ... ... ... \n", + "7038 No Yes Yes Yes Yes \n", + "7039 Yes Yes No Yes Yes \n", + "7040 No No No No No \n", + "7041 No No No No No \n", + "7042 No Yes Yes Yes Yes \n", + "\n", + " Contract PaperlessBilling PaymentMethod \\\n", + "0 Month-to-month Yes Electronic check \n", + "1 One year No Mailed check \n", + "2 Month-to-month Yes Mailed check \n", + "3 One year No Bank transfer (automatic) \n", + "4 Month-to-month Yes Electronic check \n", + "... ... ... ... \n", + "7038 One year Yes Mailed check \n", + "7039 One year Yes Credit card (automatic) \n", + "7040 Month-to-month Yes Electronic check \n", + "7041 Month-to-month Yes Mailed check \n", + "7042 Two year Yes Bank transfer (automatic) \n", + "\n", + " MonthlyCharges TotalCharges Churn \n", + "0 29.85 29.85 No \n", + "1 56.95 1889.5 No \n", + "2 53.85 108.15 Yes \n", + "3 42.30 1840.75 No \n", + "4 70.70 151.65 Yes \n", + "... ... ... ... \n", + "7038 84.80 1990.5 No \n", + "7039 103.20 7362.9 No \n", + "7040 29.60 346.45 No \n", + "7041 74.40 306.6 Yes \n", + "7042 105.65 6844.5 No \n", + "\n", + "[7043 rows x 21 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.set_option('display.max_columns', None)\n", + "data_frame = pd.read_csv(\"data/telco.csv\", sep=\";\")\n", + "data_frame" + ] + }, + { + "cell_type": "markdown", + "id": "b940ab76-cd8e-4dfc-ac99-7945b8d2fde8", + "metadata": {}, + "source": [ + "## Action-Rules package" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f8eada6d-7e99-4559-ad2d-a8fac7d6f112", + "metadata": {}, + "outputs": [], + "source": [ + "from action_rules import ActionRules\n", + "# Action Rules Mining\n", + "action_rules = ActionRules(\n", + " min_stable_attributes = min_stable_attributes, \n", + " min_flexible_attributes = min_flexible_attributes, \n", + " min_undesired_support = min_undesired_support, \n", + " min_undesired_confidence = min_undesired_confidence, \n", + " min_desired_support = min_desired_support,\n", + " min_desired_confidence = min_desired_confidence, \n", + " verbose = False)" + ] + }, + { + "cell_type": "markdown", + "id": "5b6c3107-2520-4f3e-87ee-58e7d4839aa3", + "metadata": {}, + "source": [ + "Action rules discovery." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a338278f-52b9-4686-8061-1f58af955df4", + "metadata": {}, + "outputs": [], + "source": [ + "def measure_time():\n", + " action_rules.fit(\n", + " data = data_frame, \n", + " stable_attributes = stable_attributes, \n", + " flexible_attributes = flexible_attributes, \n", + " target = target, \n", + " target_undesired_state = undesired_state,\n", + " target_desired_state = desired_state, \n", + " use_gpu = False,\n", + " use_sparse_matrix = False,\n", + " )\n", + " print('Number of action rules: ' + str(len(action_rules.get_rules().get_ar_notation())))" + ] + }, + { + "cell_type": "markdown", + "id": "9944a727-6963-4e38-8d9b-8fed4bc3e8d3", + "metadata": {}, + "source": [ + "Measure time for each parameters combination." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f567ea5d-7a82-43c6-91fd-03e9e05a1d53", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of action rules: 1113\n", + "1.51 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n", + "Time: 1.51 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n" + ] + } + ], + "source": [ + "t = %timeit -n1 -r1 -o measure_time()\n", + "print('Time: ' + str(t))" + ] + }, + { + "cell_type": "markdown", + "id": "a57f1a64-7b3a-4b1d-8db3-a6b0c47cd7a6", + "metadata": {}, + "source": [ + "# actionRulesDiscovery package" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5cb7cd3b-6976-4024-a8ae-6e5538377617", + "metadata": {}, + "outputs": [], + "source": [ + "from actionrules.actionRulesDiscovery import ActionRulesDiscovery\n", + "\n", + "actionRulesDiscovery = ActionRulesDiscovery()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "742f211e-70c9-4a2f-b5e9-bf9c349f21c7", + "metadata": {}, + "outputs": [], + "source": [ + "def measure_time_2():\n", + " actionRulesDiscovery.load_pandas(data_frame)\n", + " actionRulesDiscovery.fit(stable_attributes = stable_attributes,\n", + " flexible_attributes = flexible_attributes,\n", + " consequent = target,\n", + " conf=min_undesired_confidence * 100,\n", + " supp=-min_desired_support,\n", + " desired_classes = [desired_state],\n", + " is_nan=False,\n", + " is_reduction=True,\n", + " is_strict_flexible=False,\n", + " min_stable_attributes=min_stable_attributes,\n", + " min_flexible_attributes=min_flexible_attributes,\n", + " max_stable_attributes=100,\n", + " max_flexible_attributes=100)\n", + " print('Number of action rules: ' + str(len(actionRulesDiscovery.get_action_rules())))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "23fda5ba-7d11-4c3a-99e6-3fb13483e515", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/jupyter-xsykl04@vse.cz/.local/lib/python3.10/site-packages/actionrules/decisions/decisions.py:81: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", + " self.data = self.data.applymap(str)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of action rules: 1113\n", + "3.21 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n", + "Time: 3.21 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n" + ] + } + ], + "source": [ + "t = %timeit -n1 -r1 -o measure_time_2()\n", + "print('Time: ' + str(t))" + ] + }, + { + "cell_type": "markdown", + "id": "8781823a-cbeb-4352-bd0f-fd93a26f9a4a", + "metadata": {}, + "source": [ + "# Result\n", + "\n", + "The both packages found the same amount of rules. Action-Rules package is faster." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}