[QPROF] Correcting metrics. (#1260)

* [QPROF] Correcting metrics. - correcting multiple rounding errors. - correcting agg errors. - supporting External tables * Update qprof.py * Update qprof.py * syntax * added units for Execution time QPROF Interface * Update tree.py * Update tree.py * Update qprof.py --------- Co-authored-by: Umar Farooq Ghumman <[email protected]>
vertica · Aug 14, 2024 · 8ad47ee · 8ad47ee
1 parent ab0ab18
commit 8ad47ee
Show file tree

Hide file tree

Showing 5 changed files with 39 additions and 31 deletions.
diff --git a/verticapy/performance/vertica/qprof.py b/verticapy/performance/vertica/qprof.py
@@ -2625,7 +2625,10 @@ def _get_metric_val(self):
         """
         vdf = self.get_qexecution_report()
         cols = vdf.get_columns()[3:]
-        columns = [f"SUM({col}) AS {col}" for col in cols]
+        columns = [
+            f"AVG({col}) AS {col}" if "_us" in col else f"SUM({col}) AS {col}"
+            for col in cols
+        ]
         query = f"""
             SELECT
                 operator_name,
@@ -2933,8 +2936,8 @@ def get_qplan_tree(
             - cstall_us
             - exec_time_us (default)
             - est_rows
-            - mem_all_mb
-            - mem_res_mb
+            - mem_all_b
+            - mem_res_b
             - proc_rows
             - prod_rows
             - pstall_us
@@ -3752,8 +3755,8 @@ def get_qexecution_report(self) -> vDataFrame:
                 node_name,
                 operator_name,
                 path_id,
-                ROUND(SUM(CASE TRIM(counter_name) WHEN 'execution time (us)' THEN
-                    counter_value ELSE NULL END) / 1000, 3.0) AS exec_time_us,
+                SUM(CASE TRIM(counter_name) WHEN 'execution time (us)' THEN
+                    counter_value ELSE NULL END) AS exec_time_us,
                 SUM(CASE TRIM(counter_name) WHEN 'estimated rows produced' THEN
                     counter_value ELSE NULL END) AS est_rows,
                 SUM(CASE TRIM(counter_name) WHEN 'rows processed' THEN
@@ -3768,18 +3771,18 @@ def get_qexecution_report(self) -> vDataFrame:
                     counter_value ELSE NULL END) AS pstall_us,
                 SUM(CASE TRIM(counter_name) WHEN 'clock time (us)' THEN
                     counter_value ELSE NULL END) AS clock_time_us,
-                ROUND(SUM(CASE TRIM(counter_name) WHEN 'memory reserved (bytes)' THEN
-                    counter_value ELSE NULL END) / 1000000, 1.0) AS mem_res_mb,
-                ROUND(SUM(CASE TRIM(counter_name) WHEN 'memory allocated (bytes)' THEN 
-                    counter_value ELSE NULL END) / 1000000, 1.0) AS mem_all_mb,
+                SUM(CASE TRIM(counter_name) WHEN 'memory reserved (bytes)' THEN
+                    counter_value ELSE NULL END) AS mem_res_b,
+                SUM(CASE TRIM(counter_name) WHEN 'memory allocated (bytes)' THEN 
+                    counter_value ELSE NULL END) AS mem_all_b,
                 SUM(CASE TRIM(counter_name) WHEN 'bytes spilled' THEN
                     counter_value ELSE NULL END) AS bytes_spilled
             FROM
                 v_monitor.execution_engine_profiles
             WHERE
                 transaction_id={self.transaction_id} AND
                 statement_id={self.statement_id} AND
-                counter_value / 1000000 > 0
+                counter_value >= 0
             GROUP BY
                 1, 2, 3
             ORDER BY
@@ -3842,8 +3845,8 @@ def get_qexecution(
             - cstall_us
             - exec_time_us (default)
             - est_rows
-            - mem_all_mb
-            - mem_res_mb
+            - mem_all_b
+            - mem_res_b
             - proc_rows
             - prod_rows
             - pstall_us

diff --git a/verticapy/performance/vertica/qprof_interface.py b/verticapy/performance/vertica/qprof_interface.py
@@ -168,7 +168,7 @@ def get_qplan_tree(self, use_javascript=True, **style_kwargs):
         dropdown1 = widgets.Dropdown(
             options=options_dropwdown,
             description="Metric # 1:",
-            value="Execution time in \u00b5s",
+            value="AVG Execution time per node in \u00b5s",
             layout={"width": "260px"},
         )
         dropdown2 = widgets.Dropdown(
@@ -471,7 +471,7 @@ def update_query_display(self):
         self.query_display.children[0].value = current_query
         self.query_display_info.value = f"""
         <b>Query Execution Success:</b> {self.success_html if self.query_success else self.failure_html} <br>
-        <b>Execution Time:</b> {self.get_qduration()} <br>
+        <b>Execution Time:</b> {self.get_qduration()} (seconds)<br>
         <b>Target Schema:</b> {self.target_schema["v_internal"] if self.target_schema else ''} <br>
         <b>Transaction ID:</b> {self.transaction_id} <br>
         <b>Statement ID:</b> {self.statement_id} <br>

diff --git a/verticapy/performance/vertica/qprof_utility.py b/verticapy/performance/vertica/qprof_utility.py
@@ -157,8 +157,8 @@ def _get_metrics() -> list:
             "cstall_us",
             "exec_time_us",
             "est_rows",
-            "mem_all_mb",
-            "mem_res_mb",
+            "mem_all_b",
+            "mem_res_b",
             "proc_rows",
             "prod_rows",
             "pstall_us",
@@ -170,16 +170,16 @@ def _get_metrics() -> list:
     def _get_metrics_name(metric: str, inv: bool = False) -> str:
         look_up_table = {
             "bytes_spilled": "Number of bytes spilled",
-            "clock_time_us": "Clock time in \u00b5s",
+            "clock_time_us": "AVG Clock time per node in \u00b5s",
             "cost": "Query plan cost",
-            "cstall_us": "Network consumer stall time in \u00b5s",
-            "exec_time_us": "Execution time in \u00b5s",
+            "cstall_us": "AVG Network consumer stall time per node in \u00b5s",
+            "exec_time_us": "AVG Execution time per node in \u00b5s",
             "est_rows": "Estimated row count",
-            "mem_res_mb": "Reserved memory size in MB",
-            "mem_all_mb": "Allocated memory size in MB",
+            "mem_res_b": "Reserved memory size in B",
+            "mem_all_b": "Allocated memory size in B",
             "proc_rows": "Processed row count",
             "prod_rows": "Produced row count",
-            "pstall_us": "Network producer stall time in \u00b5s",
+            "pstall_us": "AVG Network producer stall time per node in \u00b5s",
             "rle_prod_rows": "Produced RLE row count",
             "rows": "Row count",
         }

diff --git a/verticapy/performance/vertica/tree.py b/verticapy/performance/vertica/tree.py
@@ -67,8 +67,8 @@ class PerformanceTree:
         - cstall_us
         - exec_time_us (default)
         - est_rows
-        - mem_all_mb
-        - mem_res_mb
+        - mem_all_b
+        - mem_res_b
         - proc_rows
         - prod_rows
         - pstall_us
@@ -825,6 +825,8 @@ def _get_operator_icon(self, operator: str) -> Optional[str]:
                     return "C"
                 elif "FILTER" in operator or "Filter" in operator:
                     return "F"
+                elif "LOAD" in operator:
+                    return "L"
             else:
                 if "TEMP RELATION ACCESS" in operator:
                     return "⏳"
@@ -856,6 +858,8 @@ def _get_operator_icon(self, operator: str) -> Optional[str]:
                     return "📋"
                 elif "FILTER" in operator or "Filter" in operator:
                     return "🔍"
+                elif "LOAD" in operator:
+                    "💾"
             return "?"
         return None
 
@@ -1375,10 +1379,10 @@ def _gen_labels(self) -> str:
                 [self._get_metric(self.rows[i], self.metric[j], i) for i in range(n)]
             ]
         if not (isinstance(self.metric[0], NoneType)):
-            all_metrics = [math.log(1 + me[0][i]) for i in range(n)]
+            all_metrics = [math.log(1 + max(me[0][i], 0.0)) for i in range(n)]
             m_min, m_max = min(all_metrics), max(all_metrics)
         if len(self.metric) > 1 and not (isinstance(self.metric[1], NoneType)):
-            all_metrics_2 = [math.log(1 + me[1][i]) for i in range(n)]
+            all_metrics_2 = [math.log(1 + max(me[1][i], 0.0)) for i in range(n)]
             m_min_2, m_max_2 = min(all_metrics_2), max(all_metrics_2)
             if not (self.style["two_legend"]):
                 m_min = min(m_min, m_min_2)
@@ -1743,7 +1747,8 @@ def _gen_legend(self, metric: Optional[list] = None, idx: int = 0) -> str:
         all_metrics = []
         for me in metric:
             all_metrics += [
-                math.log(1 + self._get_metric(self.rows[i], me, i)) for i in range(n)
+                math.log(1 + max(self._get_metric(self.rows[i], me, i), 0.0))
+                for i in range(n)
             ]
         m_min, m_max = min(all_metrics), max(all_metrics)
         if m_min == m_max:

diff --git a/verticapy/tests_new/performance/vertica/test_qprof.py b/verticapy/tests_new/performance/vertica/test_qprof.py
@@ -993,8 +993,8 @@ def test_get_qplan(self, qprof_data, return_report, print_plan):
             "clock_time_us",
             # "cstall_us", # ZeroDivisionError
             # "pstall_us", # ZeroDivisionError
-            "mem_res_mb",
-            # "mem_all_mb",  # ZeroDivisionError
+            "mem_res_b",
+            # "mem_all_b",  # ZeroDivisionError
         ],
     )
     def test_get_qplan_tree(
@@ -1475,8 +1475,8 @@ def test_get_qexecution_report(self):
             # "clock_time_us",
             # "cstall_us",
             # "pstall_us",
-            # "mem_res_mb",
-            # "mem_all_mb",
+            # "mem_res_b",
+            # "mem_all_b",
         ],
     )
     @pytest.mark.parametrize(