Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reviewing CWV report V2 #38

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
Draft
7 changes: 6 additions & 1 deletion definitions/declarations/httparchive.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,10 @@ for (const table of stagingTables) {

declare({
schema: 'wappalyzer',
name: 'apps'
name: 'technologies'
})

declare({
schema: 'wappalyzer',
name: 'categories'
})
199 changes: 155 additions & 44 deletions definitions/output/core_web_vitals/technologies.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,105 @@ CREATE TEMP FUNCTION IS_NON_ZERO(
) RETURNS BOOL AS (
good + needs_improvement + poor > 0
);

CREATE TEMP FUNCTION extract_audits (lighthouse JSON)
RETURNS ARRAY<STRUCT<
id STRING,
savings_ms INT64,
savings_bytes INT64
>>
LANGUAGE js AS """
const results = []
const performance_audits = lighthouse?.categories ? lighthouse.categories.performance.auditRefs
.filter((audit) => audit.group === "diagnostics")
.map((audit) => audit.id) : null

if(performance_audits) {
for (const [key, audit] of Object.entries(lighthouse.audits)) {
if (
performance_audits.includes(audit.id) &&
audit.score !== null &&
audit.scoreDisplayMode === 'metricSavings'
) {
results.push({
id: audit.id,
savings_ms: audit?.details?.overallSavingsMs || audit?.numericUnit === 'millisecond' ? audit.numericValue : null,
savings_bytes: audit?.details?.overallSavingsBytes || audit?.numericUnit === 'byte' ? audit.numericValue : null,
})
}
}
return results;
} else {
return null;
}
""";
`).query(ctx => `
WITH geo_summary AS (
WITH pages AS (
SELECT
CAST(REGEXP_REPLACE(CAST(yyyymm AS STRING), r'(\\d{4})(\\d{2})', r'\\1-\\2-01') AS DATE) AS date,
* EXCEPT (country_code),
\`chrome-ux-report\`.experimental.GET_COUNTRY(country_code) AS geo
client,
page,
root_page AS origin,
technologies,
summary,
lighthouse
FROM ${ctx.ref('crawl', 'pages')}
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
), geo_summary AS (
SELECT
\`chrome-ux-report\`.experimental.GET_COUNTRY(country_code) AS geo,
rank,
device,
origin,
avg_fcp,
avg_fid,
avg_inp,
avg_lcp,
avg_ttfb,
fast_fcp,
fast_fid,
fast_inp,
fast_lcp,
fast_ttfb,
slow_fcp,
slow_fid,
slow_inp,
slow_lcp,
slow_ttfb,
small_cls,
medium_cls,
large_cls
FROM ${ctx.ref('chrome-ux-report', 'materialized', 'country_summary')}
WHERE
yyyymm = CAST(FORMAT_DATE('%Y%m', '${pastMonth}') AS INT64) AND
device IN ('desktop', 'phone')
UNION ALL

UNION ALL

SELECT
* EXCEPT (yyyymmdd, p75_fid_origin, p75_cls_origin, p75_lcp_origin, p75_inp_origin),
'ALL' AS geo
'ALL' AS geo,
rank,
device,
origin,
avg_fcp,
avg_fid,
avg_inp,
avg_lcp,
avg_ttfb,
fast_fcp,
fast_fid,
fast_inp,
fast_lcp,
fast_ttfb,
slow_fcp,
slow_fid,
slow_inp,
slow_lcp,
slow_ttfb,
small_cls,
medium_cls,
large_cls
FROM ${ctx.ref('chrome-ux-report', 'materialized', 'device_summary')}
WHERE
date = '${pastMonth}' AND
Expand All @@ -61,7 +146,7 @@ crux AS (
WHEN 10000 THEN 'Top 10k'
WHEN 1000 THEN 'Top 1k'
END AS rank,
CONCAT(origin, '/') AS root_page,
CONCAT(origin, '/') AS origin,
IF(device = 'desktop', 'desktop', 'mobile') AS client,

# CWV
Expand Down Expand Up @@ -92,77 +177,95 @@ crux AS (
WHERE rank <= _rank
),

/*
audits AS (
SELECT
client,
page,
performance_opportunities.id
FROM pages,
UNNEST(extract_audits(lighthouse)) AS performance_opportunities
WHERE
performance_opportunities.savings_ms > 0 OR
performance_opportunities.savings_bytes > 0
),
*/

technologies AS (
SELECT
technology.technology,
tech.technology,
REGEXP_EXTRACT_ALL(version, r'(0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)')[SAFE_OFFSET(0)] AS version,
client,
page
FROM ${ctx.ref('crawl', 'pages')},
UNNEST(technologies) AS technology
WHERE
date = '${pastMonth}'
${constants.devRankFilter} AND
technology.technology IS NOT NULL AND
technology.technology != ''
UNION ALL
FROM pages,
UNNEST(technologies) AS tech,
UNNEST(tech.info) AS version
WHERE REGEXP_EXTRACT_ALL(version, r'(0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)')[SAFE_OFFSET(0)] IS NOT NULL

UNION ALL

SELECT
tech.technology,
'ALL' AS version,
client,
page
FROM pages,
UNNEST(technologies) AS tech

UNION ALL

SELECT
'ALL' AS technology,
'ALL' AS version,
client,
page
FROM ${ctx.ref('crawl', 'pages')}
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
FROM pages
),

categories AS (
SELECT
technology.technology,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
FROM ${ctx.ref('crawl', 'pages')},
FROM pages,
UNNEST(technologies) AS technology,
UNNEST(technology.categories) AS category
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
GROUP BY technology
UNION ALL

UNION ALL

SELECT
'ALL' AS technology,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
FROM ${ctx.ref('crawl', 'pages')},
FROM pages,
UNNEST(technologies) AS technology,
UNNEST(technology.categories) AS category
WHERE
date = '${pastMonth}' AND
client = 'mobile'
${constants.devRankFilter}
),

summary_stats AS (
lab_metrics AS (
SELECT
client,
page,
root_page AS root_page,
origin,
SAFE.INT64(summary.bytesTotal) AS bytesTotal,
SAFE.INT64(summary.bytesJS) AS bytesJS,
SAFE.INT64(summary.bytesImg) AS bytesImg,
SAFE.FLOAT64(lighthouse.categories.accessibility.score) AS accessibility,
SAFE.FLOAT64(lighthouse.categories['best-practices'].score) AS best_practices,
SAFE.FLOAT64(lighthouse.categories.performance.score) AS performance,
SAFE.FLOAT64(lighthouse.categories.pwa.score) AS pwa,
SAFE.FLOAT64(lighthouse.categories.seo.score) AS seo
FROM ${ctx.ref('crawl', 'pages')}
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
SAFE.FLOAT64(lighthouse.categories.seo.score) AS seo,
extract_audits(lighthouse) AS performance_opportunities,
FROM pages
),

lab_data AS (
SELECT
client,
root_page,
origin,
technology,
version,
ANY_VALUE(category) AS category,
AVG(bytesTotal) AS bytesTotal,
AVG(bytesJS) AS bytesJS,
Expand All @@ -172,15 +275,16 @@ lab_data AS (
AVG(performance) AS performance,
AVG(pwa) AS pwa,
AVG(seo) AS seo
FROM summary_stats
JOIN technologies
FROM lab_metrics
INNER JOIN technologies
USING (client, page)
JOIN categories
INNER JOIN categories
USING (technology)
GROUP BY
client,
root_page,
technology
origin,
technology,
version
)

SELECT
Expand All @@ -189,8 +293,9 @@ SELECT
rank,
ANY_VALUE(category) AS category,
technology AS app,
version,
client,
COUNT(0) AS origins,
COUNT(DISTINCT origin) AS origins,

# CrUX data
COUNTIF(good_fid) AS origins_with_good_fid,
Expand Down Expand Up @@ -220,16 +325,22 @@ SELECT
SAFE_CAST(APPROX_QUANTILES(pwa, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_pwa,
SAFE_CAST(APPROX_QUANTILES(seo, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_seo,

SAFE_DIVIDE(COUNTIF(accessibility >= 0.9), COUNTIF(accessibility > 0)) AS lighthouse_score_accessibility_pass_rate,
SAFE_DIVIDE(COUNTIF(best_practices >= 0.9), COUNTIF(best_practices > 0)) AS lighthouse_score_best_practices_pass_rate,
SAFE_DIVIDE(COUNTIF(performance >= 0.9), COUNTIF(performance > 0)) AS lighthouse_score_performance_pass_rate,
SAFE_DIVIDE(COUNTIF(seo >= 0.9), COUNTIF(seo > 0)) AS lighthouse_score_seo_pass_rate,

# Page weight stats
SAFE_CAST(APPROX_QUANTILES(bytesTotal, 1000)[OFFSET(500)] AS INT64) AS median_bytes_total,
SAFE_CAST(APPROX_QUANTILES(bytesJS, 1000)[OFFSET(500)] AS INT64) AS median_bytes_js,
SAFE_CAST(APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS INT64) AS median_bytes_image

FROM lab_data
INNER JOIN crux
USING (client, root_page)
USING (client, origin)
GROUP BY
app,
version,
geo,
rank,
client
Expand Down
4 changes: 2 additions & 2 deletions definitions/output/reports/cwv_tech_adoption.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ SELECT
rank,
geo,
STRUCT(
COALESCE(MAX(IF(client = 'desktop', origins, NULL))) AS desktop,
COALESCE(MAX(IF(client = 'mobile', origins, NULL))) AS mobile
COALESCE(MAX(IF(client = 'desktop', origins, 0))) AS desktop,
COALESCE(MAX(IF(client = 'mobile', origins, 0))) AS mobile
) AS adoption
FROM ${ctx.ref('core_web_vitals', 'technologies')}
WHERE date = '${pastMonth}'
Expand Down
23 changes: 16 additions & 7 deletions definitions/output/reports/cwv_tech_categories.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,19 @@ WITH pages AS (
client = 'mobile'
${constants.devRankFilter}
), categories AS (
SELECT
name AS category,
description
FROM ${ctx.ref('wappalyzer', 'categories')}
), categories_stats AS (
SELECT
category,
COUNT(DISTINCT root_page) AS origins
FROM pages,
UNNEST(technologies) AS t,
UNNEST(t.categories) AS category
GROUP BY category
), technologies AS (
), technologies_stats AS (
SELECT
category,
technology,
Expand All @@ -38,13 +43,17 @@ WITH pages AS (
SELECT
category,
categories.origins,
ARRAY_AGG(technology IGNORE NULLS ORDER BY technologies.origins DESC) AS technologies
FROM categories
JOIN technologies
description,
categories_stats.origins,
ARRAY_AGG(technology IGNORE NULLS ORDER BY technologies_stats.origins DESC) AS technologies
FROM categories_stats
INNER JOIN technologies_stats
USING (category)
LEFT JOIN categories
USING (category)
GROUP BY
category,
categories.origins
ORDER BY categories.origins DESC
description,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sarahfossheim FYI v1/categories will get this additional field in January.

origins
ORDER BY origins DESC
`)
Loading
Loading