Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reviewing CWV report V2 #38

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
Draft
7 changes: 6 additions & 1 deletion definitions/declarations/httparchive.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,10 @@ for (const table of stagingTables) {

declare({
schema: 'wappalyzer',
name: 'apps'
name: 'technologies'
})

declare({
schema: 'wappalyzer',
name: 'categories'
})
41 changes: 17 additions & 24 deletions definitions/output/core_web_vitals/technologies.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,16 @@ CREATE TEMP FUNCTION IS_NON_ZERO(
good + needs_improvement + poor > 0
);
`).query(ctx => `
WITH geo_summary AS (
WITH pages AS (
SELECT
client,
page,
technologies
FROM ${ctx.ref('crawl', 'pages')}
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
), geo_summary AS (
SELECT
CAST(REGEXP_REPLACE(CAST(yyyymm AS STRING), r'(\\d{4})(\\d{2})', r'\\1-\\2-01') AS DATE) AS date,
* EXCEPT (country_code),
Expand Down Expand Up @@ -94,49 +103,36 @@ crux AS (

technologies AS (
SELECT
technology.technology,
tech.technology,
client,
page
FROM ${ctx.ref('crawl', 'pages')},
UNNEST(technologies) AS technology
WHERE
date = '${pastMonth}'
${constants.devRankFilter} AND
technology.technology IS NOT NULL AND
technology.technology != ''
FROM pages,
UNNEST(technologies) AS tech
UNION ALL
SELECT
'ALL' AS technology,
client,
page
FROM ${ctx.ref('crawl', 'pages')}
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
FROM pages
),

categories AS (
SELECT
technology.technology,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
FROM ${ctx.ref('crawl', 'pages')},
FROM pages,
UNNEST(technologies) AS technology,
UNNEST(technology.categories) AS category
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
GROUP BY technology
UNION ALL
SELECT
'ALL' AS technology,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
FROM ${ctx.ref('crawl', 'pages')},
FROM pages,
UNNEST(technologies) AS technology,
UNNEST(technology.categories) AS category
WHERE
date = '${pastMonth}' AND
client = 'mobile'
${constants.devRankFilter}
),

summary_stats AS (
Expand All @@ -152,10 +148,7 @@ summary_stats AS (
SAFE.FLOAT64(lighthouse.categories.performance.score) AS performance,
SAFE.FLOAT64(lighthouse.categories.pwa.score) AS pwa,
SAFE.FLOAT64(lighthouse.categories.seo.score) AS seo
FROM ${ctx.ref('crawl', 'pages')}
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
FROM pages
),

lab_data AS (
Expand Down
4 changes: 2 additions & 2 deletions definitions/output/reports/cwv_tech_adoption.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ SELECT
rank,
geo,
STRUCT(
COALESCE(MAX(IF(client = 'desktop', origins, NULL))) AS desktop,
COALESCE(MAX(IF(client = 'mobile', origins, NULL))) AS mobile
COALESCE(MAX(IF(client = 'desktop', origins, 0))) AS desktop,
COALESCE(MAX(IF(client = 'mobile', origins, 0))) AS mobile
) AS adoption
FROM ${ctx.ref('core_web_vitals', 'technologies')}
WHERE date = '${pastMonth}'
Expand Down
23 changes: 16 additions & 7 deletions definitions/output/reports/cwv_tech_categories.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,19 @@ WITH pages AS (
client = 'mobile'
${constants.devRankFilter}
), categories AS (
SELECT
name AS category,
description
FROM ${ctx.ref('wappalyzer', 'categories')}
), categories_stats AS (
SELECT
category,
COUNT(DISTINCT root_page) AS origins
FROM pages,
UNNEST(technologies) AS t,
UNNEST(t.categories) AS category
GROUP BY category
), technologies AS (
), technologies_stats AS (
SELECT
category,
technology,
Expand All @@ -38,13 +43,17 @@ WITH pages AS (
SELECT
category,
categories.origins,
ARRAY_AGG(technology IGNORE NULLS ORDER BY technologies.origins DESC) AS technologies
FROM categories
JOIN technologies
description,
categories_stats.origins,
ARRAY_AGG(technology IGNORE NULLS ORDER BY technologies_stats.origins DESC) AS technologies
FROM categories_stats
INNER JOIN technologies_stats
USING (category)
LEFT JOIN categories
USING (category)
GROUP BY
category,
categories.origins
ORDER BY categories.origins DESC
description,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sarahfossheim FYI v1/categories will get this additional field in January.

origins
ORDER BY origins DESC
`)
68 changes: 59 additions & 9 deletions definitions/output/reports/cwv_tech_technologies.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,69 @@ publish('cwv_tech_technologies', {
tags: ['crux_ready']
}).query(ctx => `
/* {"dataform_trigger": "report_cwv_tech_complete", "name": "technologies", "type": "dict"} */
WITH pages AS (
SELECT
client,
root_page AS origin,
tech.technology
FROM ${ctx.ref('crawl', 'pages')},
UNNEST(technologies) AS tech
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
), total_pages AS (
SELECT
client,
COUNT(DISTINCT origin) AS origins
FROM pages
GROUP BY client
), technologies AS (
SELECT
technology,
description,
ARRAY_TO_STRING(categories, ', ') AS category,
categories AS category_obj,
NULL AS similar_technologies
FROM ${ctx.ref('wappalyzer', 'technologies')}
), tech_origins AS (
SELECT
client,
technology,
COUNT(DISTINCT origin) AS origins
FROM pages
GROUP BY
client,
technology
)

SELECT
client,
app AS technology,
technology,
description,
category,
SPLIT(category, ",") AS category_obj,
category_obj,
similar_technologies,
COALESCE(origins, 0) AS origins
FROM tech_origins
INNER JOIN technologies
USING(technology)
ORDER BY origins DESC

UNION ALL

SELECT
client,
'ALL' AS technology,
NULL AS description,
ARRAY_TO_STRING(categories, ', ') AS category,
categories AS category_obj,
NULL AS similar_technologies,
origins
FROM ${ctx.ref('core_web_vitals', 'technologies')}
LEFT JOIN ${ctx.ref('wappalyzer', 'apps')}
ON app = name
WHERE date = '${pastMonth}' AND
geo = 'ALL' AND
rank = 'ALL'
ORDER BY origins DESC
FROM total_pages
CROSS JOIN (
SELECT
ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category) AS categories
FROM technologies,
UNNEST(category_obj) AS category
) AS cat
`)
6 changes: 3 additions & 3 deletions definitions/output/wappalyzer/tech_detections.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ tech_deprecated_gone_origins AS (
-- aggregation of technology adoption/deprecation metrics
SELECT
DATE('${constants.currentMonth}') AS date,
COALESCE(before_summary.technology, tech_adopted_existing_origins.technology, tech_adopted_new_origins.technology, apps.name) AS technology,
COALESCE(before_summary.technology, tech_adopted_existing_origins.technology, tech_adopted_new_origins.technology, technologies.name) AS technology,

-- origins summary
0-COALESCE(total_origins_deprecated_existing, 0) AS total_origins_deprecated_existing,
Expand All @@ -139,6 +139,6 @@ LEFT JOIN tech_deprecated_existing_origins
ON before_summary.technology = tech_deprecated_existing_origins.technology
LEFT JOIN tech_deprecated_gone_origins
ON before_summary.technology = tech_deprecated_gone_origins.technology
FULL OUTER JOIN wappalyzer.apps
ON before_summary.technology = apps.name
FULL OUTER JOIN ${ctx.ref('wappalyzer', 'technologies')} AS technologies
ON before_summary.technology = technologies.name
`)
Loading