Skip to content

Commit

Permalink
[QPROF] Correcting metrics. (#1260)
Browse files Browse the repository at this point in the history
* [QPROF] Correcting metrics.

 - correcting multiple rounding errors.
 - correcting agg errors.
 - supporting External tables

* Update qprof.py

* Update qprof.py

* syntax

* added units for Execution time QPROF Interface

* Update tree.py

* Update tree.py

* Update qprof.py

---------

Co-authored-by: Umar Farooq Ghumman <[email protected]>
  • Loading branch information
oualib and mail4umar authored Aug 14, 2024
1 parent ab0ab18 commit 8ad47ee
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 31 deletions.
27 changes: 15 additions & 12 deletions verticapy/performance/vertica/qprof.py
Original file line number Diff line number Diff line change
Expand Up @@ -2625,7 +2625,10 @@ def _get_metric_val(self):
"""
vdf = self.get_qexecution_report()
cols = vdf.get_columns()[3:]
columns = [f"SUM({col}) AS {col}" for col in cols]
columns = [
f"AVG({col}) AS {col}" if "_us" in col else f"SUM({col}) AS {col}"
for col in cols
]
query = f"""
SELECT
operator_name,
Expand Down Expand Up @@ -2933,8 +2936,8 @@ def get_qplan_tree(
- cstall_us
- exec_time_us (default)
- est_rows
- mem_all_mb
- mem_res_mb
- mem_all_b
- mem_res_b
- proc_rows
- prod_rows
- pstall_us
Expand Down Expand Up @@ -3752,8 +3755,8 @@ def get_qexecution_report(self) -> vDataFrame:
node_name,
operator_name,
path_id,
ROUND(SUM(CASE TRIM(counter_name) WHEN 'execution time (us)' THEN
counter_value ELSE NULL END) / 1000, 3.0) AS exec_time_us,
SUM(CASE TRIM(counter_name) WHEN 'execution time (us)' THEN
counter_value ELSE NULL END) AS exec_time_us,
SUM(CASE TRIM(counter_name) WHEN 'estimated rows produced' THEN
counter_value ELSE NULL END) AS est_rows,
SUM(CASE TRIM(counter_name) WHEN 'rows processed' THEN
Expand All @@ -3768,18 +3771,18 @@ def get_qexecution_report(self) -> vDataFrame:
counter_value ELSE NULL END) AS pstall_us,
SUM(CASE TRIM(counter_name) WHEN 'clock time (us)' THEN
counter_value ELSE NULL END) AS clock_time_us,
ROUND(SUM(CASE TRIM(counter_name) WHEN 'memory reserved (bytes)' THEN
counter_value ELSE NULL END) / 1000000, 1.0) AS mem_res_mb,
ROUND(SUM(CASE TRIM(counter_name) WHEN 'memory allocated (bytes)' THEN
counter_value ELSE NULL END) / 1000000, 1.0) AS mem_all_mb,
SUM(CASE TRIM(counter_name) WHEN 'memory reserved (bytes)' THEN
counter_value ELSE NULL END) AS mem_res_b,
SUM(CASE TRIM(counter_name) WHEN 'memory allocated (bytes)' THEN
counter_value ELSE NULL END) AS mem_all_b,
SUM(CASE TRIM(counter_name) WHEN 'bytes spilled' THEN
counter_value ELSE NULL END) AS bytes_spilled
FROM
v_monitor.execution_engine_profiles
WHERE
transaction_id={self.transaction_id} AND
statement_id={self.statement_id} AND
counter_value / 1000000 > 0
counter_value >= 0
GROUP BY
1, 2, 3
ORDER BY
Expand Down Expand Up @@ -3842,8 +3845,8 @@ def get_qexecution(
- cstall_us
- exec_time_us (default)
- est_rows
- mem_all_mb
- mem_res_mb
- mem_all_b
- mem_res_b
- proc_rows
- prod_rows
- pstall_us
Expand Down
4 changes: 2 additions & 2 deletions verticapy/performance/vertica/qprof_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def get_qplan_tree(self, use_javascript=True, **style_kwargs):
dropdown1 = widgets.Dropdown(
options=options_dropwdown,
description="Metric # 1:",
value="Execution time in \u00b5s",
value="AVG Execution time per node in \u00b5s",
layout={"width": "260px"},
)
dropdown2 = widgets.Dropdown(
Expand Down Expand Up @@ -471,7 +471,7 @@ def update_query_display(self):
self.query_display.children[0].value = current_query
self.query_display_info.value = f"""
<b>Query Execution Success:</b> {self.success_html if self.query_success else self.failure_html} <br>
<b>Execution Time:</b> {self.get_qduration()} <br>
<b>Execution Time:</b> {self.get_qduration()} (seconds)<br>
<b>Target Schema:</b> {self.target_schema["v_internal"] if self.target_schema else ''} <br>
<b>Transaction ID:</b> {self.transaction_id} <br>
<b>Statement ID:</b> {self.statement_id} <br>
Expand Down
16 changes: 8 additions & 8 deletions verticapy/performance/vertica/qprof_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ def _get_metrics() -> list:
"cstall_us",
"exec_time_us",
"est_rows",
"mem_all_mb",
"mem_res_mb",
"mem_all_b",
"mem_res_b",
"proc_rows",
"prod_rows",
"pstall_us",
Expand All @@ -170,16 +170,16 @@ def _get_metrics() -> list:
def _get_metrics_name(metric: str, inv: bool = False) -> str:
look_up_table = {
"bytes_spilled": "Number of bytes spilled",
"clock_time_us": "Clock time in \u00b5s",
"clock_time_us": "AVG Clock time per node in \u00b5s",
"cost": "Query plan cost",
"cstall_us": "Network consumer stall time in \u00b5s",
"exec_time_us": "Execution time in \u00b5s",
"cstall_us": "AVG Network consumer stall time per node in \u00b5s",
"exec_time_us": "AVG Execution time per node in \u00b5s",
"est_rows": "Estimated row count",
"mem_res_mb": "Reserved memory size in MB",
"mem_all_mb": "Allocated memory size in MB",
"mem_res_b": "Reserved memory size in B",
"mem_all_b": "Allocated memory size in B",
"proc_rows": "Processed row count",
"prod_rows": "Produced row count",
"pstall_us": "Network producer stall time in \u00b5s",
"pstall_us": "AVG Network producer stall time per node in \u00b5s",
"rle_prod_rows": "Produced RLE row count",
"rows": "Row count",
}
Expand Down
15 changes: 10 additions & 5 deletions verticapy/performance/vertica/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ class PerformanceTree:
- cstall_us
- exec_time_us (default)
- est_rows
- mem_all_mb
- mem_res_mb
- mem_all_b
- mem_res_b
- proc_rows
- prod_rows
- pstall_us
Expand Down Expand Up @@ -825,6 +825,8 @@ def _get_operator_icon(self, operator: str) -> Optional[str]:
return "C"
elif "FILTER" in operator or "Filter" in operator:
return "F"
elif "LOAD" in operator:
return "L"
else:
if "TEMP RELATION ACCESS" in operator:
return "⏳"
Expand Down Expand Up @@ -856,6 +858,8 @@ def _get_operator_icon(self, operator: str) -> Optional[str]:
return "📋"
elif "FILTER" in operator or "Filter" in operator:
return "🔍"
elif "LOAD" in operator:
"💾"
return "?"
return None

Expand Down Expand Up @@ -1375,10 +1379,10 @@ def _gen_labels(self) -> str:
[self._get_metric(self.rows[i], self.metric[j], i) for i in range(n)]
]
if not (isinstance(self.metric[0], NoneType)):
all_metrics = [math.log(1 + me[0][i]) for i in range(n)]
all_metrics = [math.log(1 + max(me[0][i], 0.0)) for i in range(n)]
m_min, m_max = min(all_metrics), max(all_metrics)
if len(self.metric) > 1 and not (isinstance(self.metric[1], NoneType)):
all_metrics_2 = [math.log(1 + me[1][i]) for i in range(n)]
all_metrics_2 = [math.log(1 + max(me[1][i], 0.0)) for i in range(n)]
m_min_2, m_max_2 = min(all_metrics_2), max(all_metrics_2)
if not (self.style["two_legend"]):
m_min = min(m_min, m_min_2)
Expand Down Expand Up @@ -1743,7 +1747,8 @@ def _gen_legend(self, metric: Optional[list] = None, idx: int = 0) -> str:
all_metrics = []
for me in metric:
all_metrics += [
math.log(1 + self._get_metric(self.rows[i], me, i)) for i in range(n)
math.log(1 + max(self._get_metric(self.rows[i], me, i), 0.0))
for i in range(n)
]
m_min, m_max = min(all_metrics), max(all_metrics)
if m_min == m_max:
Expand Down
8 changes: 4 additions & 4 deletions verticapy/tests_new/performance/vertica/test_qprof.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,8 +993,8 @@ def test_get_qplan(self, qprof_data, return_report, print_plan):
"clock_time_us",
# "cstall_us", # ZeroDivisionError
# "pstall_us", # ZeroDivisionError
"mem_res_mb",
# "mem_all_mb", # ZeroDivisionError
"mem_res_b",
# "mem_all_b", # ZeroDivisionError
],
)
def test_get_qplan_tree(
Expand Down Expand Up @@ -1475,8 +1475,8 @@ def test_get_qexecution_report(self):
# "clock_time_us",
# "cstall_us",
# "pstall_us",
# "mem_res_mb",
# "mem_all_mb",
# "mem_res_b",
# "mem_all_b",
],
)
@pytest.mark.parametrize(
Expand Down

0 comments on commit 8ad47ee

Please sign in to comment.