From 5637e83a688217f78ff15664a1d035e2e3984ca3 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 7 Jan 2025 06:59:09 +0100 Subject: [PATCH] sql updated --- definitions/output/reports/reports_dynamic.js | 10 +++---- includes/reports.js | 27 ++++++++++--------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/definitions/output/reports/reports_dynamic.js b/definitions/output/reports/reports_dynamic.js index b134ca2..7dcc90e 100644 --- a/definitions/output/reports/reports_dynamic.js +++ b/definitions/output/reports/reports_dynamic.js @@ -33,7 +33,7 @@ for ( date, metric, sql, - lense: { key, value }, + lense: { name: key, sql: value }, devRankFilter: constants.devRankFilter }) } @@ -43,7 +43,7 @@ for ( if (startDate === endDate) { iterations.forEach((params, i) => { - publish(params.metric.id + '_' + params.sql.type + '_' + params.lense.key, { + publish(params.metric.id + '_' + params.sql.type + '_' + params.lense.name, { type: 'incremental', protected: true, bigquery: params.sql.type === 'histogram' ? { partitionBy: 'date', clusterBy: ['client'] } : {}, @@ -53,20 +53,20 @@ if (startDate === endDate) { --DELETE FROM ${ctx.self()} --WHERE date = '${params.date}'; `).query(ctx => ` -/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lense.key}"} */` + +/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lense.name}"} */` + params.sql.query(ctx, params) ) }) } else { iterations.forEach((params, i) => { operate( - params.metric.id + '_' + params.sql.type + '_' + params.lense.key + '_' + params.date) + params.metric.id + '_' + params.sql.type + '_' + params.lense.name + '_' + params.date) .tags(['crawl_complete', 'reports']) .queries(ctx => ` DELETE FROM reports.${params.metric.id}_${params.sql.type} WHERE date = '${params.date}'; -/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lense.key}"} */ +/* {"dataform_trigger": "report_complete", "date": "${params.date}", "name": "${params.metric.id}", "type": "${params.sql.type}", "lense": "${params.lense.name}"} */ INSERT INTO reports.${params.metric.id}_${params.sql.type}` + params.sql.query(ctx, params) ) diff --git a/includes/reports.js b/includes/reports.js index 7590436..7d8d4a1 100644 --- a/includes/reports.js +++ b/includes/reports.js @@ -22,13 +22,14 @@ FROM ( COUNT(0) AS volume FROM ${ctx.ref('crawl', 'pages')} WHERE - date = '${params.date}' ${params.devRankFilter} AND + date = '${params.date}' ${params.devRankFilter} ${params.lense.sql} AND is_root_page AND INT64(summary.bytesTotal) > 0 GROUP BY date, client, bin + HAVING bin IS NOT NULL ) ) ORDER BY @@ -40,26 +41,28 @@ ORDER BY { type: 'timeseries', query: DataformTemplateBuilder.create((ctx, params) => ` -SELECT - date, - client, - UNIX_SECONDS(TIMESTAMP(date)) AS timestamp, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(101)] / 1024, 2) AS p10, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(251)] / 1024, 2) AS p25, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(501)] / 1024, 2) AS p50, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(751)] / 1024, 2) AS p75, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(901)] / 1024, 2) AS p90 -FROM ( +WITH pages AS ( SELECT date, client, INT64(summary.bytesTotal) AS bytesTotal FROM ${ctx.ref('crawl', 'pages')} WHERE - date = '${params.date}' ${params.devRankFilter} AND + date = '${params.date}' ${params.devRankFilter} ${params.lense.sql} AND is_root_page AND INT64(summary.bytesTotal) > 0 ) + +SELECT + date, + client, + UNIX_SECONDS(TIMESTAMP(date)) AS timestamp, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM pages GROUP BY date, client,