Add multiprocessor suport

Now each df and each plot per loops is created using multiprocessor Pool
fullonic · Feb 25, 2020 · 264e3bf · 264e3bf
1 parent cbdde33
commit 264e3bf
Show file tree

Hide file tree

Showing 6 changed files with 121 additions and 49 deletions.
diff --git a/app.py b/app.py
@@ -9,7 +9,7 @@
 from datetime import datetime, timedelta
 from pathlib import Path
 from threading import Event, Thread
-from multiprocessing import Process
+from multiprocessing import Pool
 
 from engineio.async_drivers import gevent  # noqa
 from flask import (
@@ -29,7 +29,7 @@
 from flask_socketio import SocketIO
 
 from core.converter import ControlFile, ExperimentCycle
-from core.resume import ResumeControl, ResumeDataFrame
+from core.resume import ResumeControl, ResumeDataFrame, TearDown
 from core.error_handler import checker
 from core.parse_help_page import parser
 from core.utils import (
@@ -83,37 +83,32 @@
     )
 )
 
-
 # SocketIO
 socketio = SocketIO(app, async_mode="gevent")
 
 
 ####################
 # MULTI PROCESSOR TASK
 ####################
-def multi_global_plots(flush, wait, close, files, preview_folder, keep, folder_dst, now):
-    """Start a new processor to generate and move all global plots."""
-    global_plots(flush, wait, close, files, preview_folder, keep, folder_dst)
-    for f in Path(preview_folder).glob("*.html"):
-        print(f"moving folder {f}")
-        shutil.move(str(f), folder_dst)
-
-
 def save_loop_file(experiment):  # TODO: SAVE INDIVIDUAL LOOPS FASTER WIP
     for k, loop in enumerate(experiment.df_loop_generator):
         if experiment.file_type == "Experiment":
-            print(str(k))
-            # experiment.save(loop, name=str(k))
+            experiment.save(loop, name=str(k + 1))
         else:
-            print(f"control_{str(k)}")
-            # experiment.save(loop, name=f"control_{str(k)}")
+            experiment.save(loop, name=f"{experiment.original_file.fname}_{str(k + 1)}")
+
+
+def save_loop_graph(experiment):  # TODO: CREATE INDIVIDUAL LOOPS FASTER WIP
+    """Generate individual graphs for loop of each uploaded file."""
+    print(f"CREATING PLOT: {experiment.original_file.fname}")
+    experiment.create_plot()
 
 
 def compare_files(experiment_files, preview_experiment_files, project_folder, times={}):
     k_map = {"C1.txt": "C1.html", "C2.txt": "C2.html"}
     for uploaded, preview in zip(experiment_files, preview_experiment_files):
         if not filecmp.cmp(preview, uploaded):
-            # User uploaded a new file, global plot needs to be generated
+            # User uploaded a new file after preview global plot
             experiment = ExperimentCycle(
                 **times, original_file=uploaded, file_type="Global grafic"
             )
@@ -134,36 +129,51 @@ def process_excel_files(
 ):
     """Start a new thread to process excel file uploaded by the user."""
     # Loop throw all uploaded files and clean the data set
+    config = config_from_file()
     project_folder = Path(uploaded_excel_files[0]).parent
     if plot:
         experiment_files = sorted([f for f in Path(project_folder).glob("*.txt")])
         preview_experiment_files = sorted(
             [f for f in Path(app.config["FILES_PREVIEW_FOLDER"]).glob("*.txt")]
         )
+        times = {"flush": flush, "wait": wait, "close": close}
+        yes = True if preview_experiment_files else False
+        print(f"{yes=}")
         if preview_experiment_files:
+            print("HERE, preview_experiment_files")
             compare_files(
                 experiment_files,
                 preview_experiment_files,
                 project_folder,
-                times={"flush": flush, "wait": wait, "close": close}
+                times=times,
             )
-
-    save_converted = False  # NOTE: REMOVE AND GET INFO FROM CONFIG FILE
+        else:
+            print("global_plots")
+            global_plots(
+                flush,
+                wait,
+                close,
+                experiment_files,
+                preview_folder=f"{app.config['UPLOAD_FOLDER']}/preview",
+                keep=True,
+            )
+            for f in Path(app.config["GRAPHICS_PREVIEW_FOLDER"]).glob("*.html"):
+                print(f"moving folder {f}")
+                shutil.move(str(f), project_folder)
 
     # CALCULATE BLANKS
     control_file_1 = os.path.join(os.path.dirname(uploaded_excel_files[0]), "C1.txt")
     control_file_2 = os.path.join(os.path.dirname(uploaded_excel_files[0]), "C2.txt")
     ignore_loops = cache.get("ignored_loops")
 
+    processed_files = []
     for idx, c in enumerate([control_file_1, control_file_2]):
         C = ControlFile(
             flush, wait, close, c, file_type=f"control_{idx + 1}", ignore_loops=ignore_loops,
         )
         C_Total = ResumeControl(C)
         C_Total.get_bank()
-        if plot:
-            C.create_plot()
-
+        processed_files.append(C)
     control = C_Total.calculate_blank()
     print(f"Valor 'Blanco' {control}")
 
@@ -172,18 +182,23 @@ def process_excel_files(
         experiment = ExperimentCycle(
             flush, wait, close, data_file, ignore_loops=ignore_loops, file_type="Experiment",
         )
-        if save_converted:
-            experiment.original_file.save()
-
-        # save_loop_file(experiment)
         resume = ResumeDataFrame(experiment)
         resume.generate_resume(control)
         resume.save()
-        if plot:
-            experiment.create_plot()
-        resume.zip_folder()
-        print("Tasca conclosa")
+        processed_files.append(experiment)
+
+    p = Pool()
+    if plot:
+        p.map(save_loop_graph, processed_files)
+    if config["experiment_file_config"]["SAVE_LOOP_DF"]:
+        p.map(save_loop_file, processed_files)
+    if config["experiment_file_config"]["SAVE_CONVERTED"]:
+        for f in processed_files:
+            f.original_file.save(name=f"[Original]{f.original_file.fname}")
+
+    TearDown(Path(experiment.original_file.folder_dst)).organize()
     cache.set("generating_files", False)
+    print("Tasca conclosa")
     print(f"Processament de temps total {round(time.perf_counter() - now, 3)} segons")
 
 
@@ -203,7 +218,6 @@ def excel_files():
     session["excel_config"] = config_from_file()["file_cycle_config"]
     ignore_loops = cache.get("ignored_loops")
     if request.method == "POST":
-        print(f"{request.form=}")
         cache.set("generating_files", True)
         # IGNORED LOOPS
         C1_ignore = {"C1": [_ for _ in request.form.get("c1_ignore_loops").split(",")]}
@@ -442,5 +456,5 @@ def ignored_loops():
     print("*" * 70)
     print("Avís: tancant aquesta finestra es tancarà l’aplicació")
     print("*" * 70)
-    # socketio.run(app, debug=False, host="0.0.0.0", port=port)
-    socketio.run(app, debug=True, host="0.0.0.0", port=port)
+    socketio.run(app, debug=False, host="0.0.0.0", port=port)
+    # socketio.run(app, debug=True, host="0.0.0.0", port=port)
diff --git a/config.json b/config.json
@@ -1 +1 @@
-{"experiment_file_config": {"PLOT_TITLE": "Evoluci\u00f3 de l'oxigen disde el aire", "X_COL": "Temps (min)", "Y_COL": "mg 02/L", "DT_COL": "Date &Time [DD-MM-YYYY HH:MM:SS]", "TSCODE": "Time stamp code", "O2_COL": "SDWA0003000061      , CH 1 O2 [mg/L]", "SAVE_LOOP_DF": false, "SAVE_CONVERTED": false}, "file_cycle_config": {"flush": 3, "wait": 10, "close": 40, "aqua_volume": 0.2}, "pump_control_config": {}}
+{"experiment_file_config": {"PLOT_TITLE": "Evoluci\u00f3 de l'oxigen disde el aire", "X_COL": "Temps (min)", "Y_COL": "mg 02/L", "DT_COL": "Date &Time [DD-MM-YYYY HH:MM:SS]", "TSCODE": "Time stamp code", "O2_COL": "SDWA0003000061      , CH 1 O2 [mg/L]", "SAVE_LOOP_DF": true, "SAVE_CONVERTED": true}, "file_cycle_config": {"flush": 3, "wait": 10, "close": 40, "aqua_volume": 0.2}, "pump_control_config": {}}
diff --git a/core/converter.py b/core/converter.py
@@ -2,7 +2,7 @@
 import os
 import math
 import datetime
-
+from pathlib import Path
 
 import chardet
 import pandas as pd
@@ -46,7 +46,6 @@ def __init__(self, file_: str, file_type):  # noqa
             self.fname = "Experiment"
         self.file_output = f"{self.folder_dst}/{self.fname}"
         config = config_from_file()["experiment_file_config"]
-        self.save_converted = False if ("Preview" in file_type) else config["SAVE_CONVERTED"]
         self.dt_col_name = config["DT_COL"]
 
     @property
@@ -84,13 +83,12 @@ def to_dataframe(self, output="xlsx"):
         df.loc[:, self.dt_col_name] = df.loc[:, self.dt_col_name].map(convert_datetime)
         self.output = output
         self.df = df
-        if self.save_converted:
-            self.save(output)
 
     def save(self, name=None):
         """Export converted DF to a new file."""
         # TODO: Allow user pass a new name for the exported file
-        self.converted_file = f"{self.file_output}.xlsx"
+        name = Path(self.file_output).parent / name if name else self.file_output
+        self.converted_file = f"{name}.xlsx"
         self.df.to_excel(self.converted_file, index=False)
 
 
@@ -246,11 +244,6 @@ def df_loop_generator(self):
                 break
             start = end + 1
             end += 1
-            # if self.save_loop_df:
-            #     if self.file_type == "data":
-            #         self.save(df_close, name=str(k))
-            #     else:
-            #         self.save(df_close, name=f"control_{str(k)}")
 
             yield df_close
 

diff --git a/core/resume.py b/core/resume.py
@@ -156,7 +156,8 @@ def zip_folder(self):
         """Zip the most recent folder created with excel files."""
         # Full path of the project folder name
 
-        TearDown(Path(self.experiment.original_file.file_output).parent).zip_folder()
+        pass
+        # TearDown(Path(self.experiment.original_file.file_output).parent).zip_folder()
 
 
 class ResumeControl(ResumeDataFrame):
@@ -184,7 +185,7 @@ def __init__(self, project_folder):
         self.graph_preview = Path(ROOT) / "templates/previews"
         self.txt_preview = Path(ROOT) / "static/uploads/preview"
 
-    def zip_folder(self):
+    def organize(self):
         """Zip the most recent folder created with excel files."""
         # Full path of the project folder name
         location = self.project_folder

diff --git a/tests/UI_test.py b/tests/UI_test.py
@@ -0,0 +1,64 @@
+from pathlib import Path
+import pytest
+import requests
+from selenium import webdriver
+from selenium.common.exceptions import ElementNotInteractableException
+from core.utils import config_from_file, save_config_to_file
+
+test_data = Path(".").resolve() /"tests/Data"
+data_file = test_data / "angula.txt"
+control_file_1 = test_data / "control1.txt"
+control_file_2 = test_data / "control2.txt"
+
+
+@pytest.fixture()
+def driver():
+    options = webdriver.FirefoxOptions()
+    # options.add_argument("-Headless")
+    # options.add_argument("-Headless")
+    driver = webdriver.Firefox(options=options)
+    return driver
+
+
+# @pytest.mark.skip
+def test_home_page(driver):
+    driver.get("http://localhost:5000/")
+
+
+def process_data_with_graphics(preview=False):
+    driver = webdriver.Firefox()
+    driver.get("http://localhost:5000/")
+    control_file_1 = driver.find_element_by_id("control_file_1")
+    control_file_2 = driver.find_element_by_id("control_file_2")
+    data_file = driver.find_element_by_id("data_file")
+    control_file_1.send_keys(str(test_data / "control1.txt"))
+    control_file_2.send_keys(str(test_data / "control2.txt"))
+    data_file.send_keys(str(test_data / "angula.txt"))
+    submit = driver.find_element_by_id("submit_files")
+    if not preview:
+        submit.click()
+    else:
+        preview_plots(driver)
+
+def preview_plots(driver):
+    vista_previa = driver.find_element_by_id("generatePlotComplet")
+    vista_previa.click()
+    submit = driver.find_element_by_id("submit_files")
+    submit.click()
+    # Preview plots
+    driver.implicitly_wait(10)
+    driver.find_element_by_id("C1").send_keys("1,2")
+    driver.find_element_by_id("C1ID").click()
+    try:
+        driver.find_element_by_id("myBtn").click()
+    except ElementNotInteractableException:
+        pass
+    driver.find_element_by_xpath("/html/body/nav/a[2]").click()
+    driver.implicitly_wait(0.5)
+    driver.find_element_by_id("generatePlots").click()
+    driver.find_element_by_id("submit_files").click()
+
+def process_preview_and_plot():
+    return process_data_with_graphics(True)
+
+process_preview_and_plot()
diff --git a/tests/test_routes.py b/tests/test_routes.py
@@ -43,10 +43,10 @@ def test_preview_global_plots():
     assert response.status_code == 200
     for f in templates_folder.glob("*.html"):
         assert f.name in ["C1.html", "Experiment.html", "C2.html"]
-        # f.unlink()
+        f.unlink()
 
 
-@pytest.mark.skip
+# @pytest.mark.skip
 def test_file_upload_and_graphic():
     """Global plots.
     WHEN: User upload and wants tables plus graphics
@@ -70,7 +70,7 @@ def test_file_upload_and_graphic():
         # f.unlink()
 
 
-# @pytest.mark.skip
+@pytest.mark.skip
 def test_zip_file_exist():
     """Test zipped file exist.
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"experiment_file_config": {"PLOT_TITLE": "Evoluci\u00f3 de l'oxigen disde el aire", "X_COL": "Temps (min)", "Y_COL": "mg 02/L", "DT_COL": "Date &Time [DD-MM-YYYY HH:MM:SS]", "TSCODE": "Time stamp code", "O2_COL": "SDWA0003000061 , CH 1 O2 [mg/L]", "SAVE_LOOP_DF": false, "SAVE_CONVERTED": false}, "file_cycle_config": {"flush": 3, "wait": 10, "close": 40, "aqua_volume": 0.2}, "pump_control_config": {}}
		{"experiment_file_config": {"PLOT_TITLE": "Evoluci\u00f3 de l'oxigen disde el aire", "X_COL": "Temps (min)", "Y_COL": "mg 02/L", "DT_COL": "Date &Time [DD-MM-YYYY HH:MM:SS]", "TSCODE": "Time stamp code", "O2_COL": "SDWA0003000061 , CH 1 O2 [mg/L]", "SAVE_LOOP_DF": true, "SAVE_CONVERTED": true}, "file_cycle_config": {"flush": 3, "wait": 10, "close": 40, "aqua_volume": 0.2}, "pump_control_config": {}}