-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* meta reports * format * page level changes * lint * include all_dev * ref pages * blink_features tables * features from partitions meta * files renamed * lint * cleanup * dependencies as option * migrate to crawl * update after migration * table updated * table renamed * wappalyzer joined
- Loading branch information
1 parent
12e95b1
commit 5f0c2ed
Showing
2 changed files
with
128 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
const pastMonth = constants.fnPastMonth(constants.currentMonth) | ||
|
||
publish('tech_detections', { | ||
type: 'table', | ||
description: 'Used in dashboard: https://lookerstudio.google.com/u/7/reporting/1jh_ScPlCIbSYTf2r2Y6EftqmX9SQy4Gn/page/p_an38lbzywc/edit', | ||
schema: 'wappalyzer', | ||
tags: ['crawl_results'] | ||
}).query(ctx => ` | ||
WITH source AS ( | ||
SELECT DISTINCT | ||
date, | ||
root_page AS page, | ||
tech.technology | ||
FROM ${ctx.ref('crawl', 'pages')}, | ||
UNNEST(technologies) AS tech | ||
WHERE date >= "${pastMonth}" ${constants.devRankFilter} | ||
), | ||
-- Technology in the previous month (August) | ||
tech_before AS ( | ||
SELECT | ||
page, | ||
technology | ||
FROM source | ||
WHERE date = "${pastMonth}" | ||
), | ||
-- Technology in the current month (September) | ||
tech_current AS ( | ||
SELECT | ||
page, | ||
technology | ||
FROM source | ||
WHERE date = "${constants.currentMonth}" | ||
), | ||
-- Summary of technology and categories per page in the previous month | ||
tech_before_summary AS ( | ||
SELECT | ||
technology, | ||
COUNT(DISTINCT page) AS total_pages_before | ||
FROM tech_before | ||
GROUP BY technology | ||
), | ||
-- Pages that existed last month but introduced the technology in the current month | ||
tech_introduced_existing_pages AS ( | ||
SELECT | ||
tech_current.technology, | ||
COUNT(DISTINCT tech_current.page) AS total_pages_introduced_existing, | ||
STRING_AGG(DISTINCT tech_current.page LIMIT 5) AS sample_pages_introduced_existing | ||
FROM tech_current | ||
JOIN tech_before | ||
USING (page) | ||
LEFT JOIN tech_before AS tb | ||
ON tech_current.page = tb.page AND tech_current.technology = tb.technology | ||
WHERE tb.page IS NULL -- Technology was not detected last month | ||
GROUP BY tech_current.technology | ||
), | ||
-- Pages that were not in the dataset last month but appeared this month with the technology | ||
tech_introduced_new_pages AS ( | ||
SELECT | ||
tech_current.technology, | ||
COUNT(DISTINCT tech_current.page) AS total_pages_introduced_new, | ||
STRING_AGG(DISTINCT tech_current.page LIMIT 5) AS sample_pages_introduced_new | ||
FROM tech_current | ||
LEFT JOIN tech_before | ||
USING (page) | ||
WHERE tech_before.page IS NULL -- Page was not present last month | ||
GROUP BY tech_current.technology | ||
), | ||
-- Pages that existed this month but no longer have the technology | ||
tech_deprecated_existing_pages AS ( | ||
SELECT | ||
tech_before.technology, | ||
COUNT(DISTINCT tech_before.page) AS total_pages_deprecated_existing, | ||
STRING_AGG(DISTINCT tech_before.page LIMIT 5) AS sample_pages_deprecated_existing | ||
FROM tech_before | ||
JOIN tech_current | ||
USING (page) | ||
LEFT JOIN tech_current AS tc | ||
ON tech_before.page = tc.page AND tech_before.technology = tc.technology | ||
WHERE tc.page IS NULL -- Technology is not detected in the current month | ||
GROUP BY tech_before.technology | ||
), | ||
-- Pages that no longer exist in the current dataset | ||
tech_deprecated_gone_pages AS ( | ||
SELECT | ||
tech_before.technology, | ||
COUNT(DISTINCT tech_before.page) AS total_pages_deprecated_gone, | ||
STRING_AGG(DISTINCT tech_before.page LIMIT 5) AS sample_pages_deprecated_gone | ||
FROM tech_before | ||
LEFT JOIN tech_current | ||
USING (page) | ||
WHERE tech_current.page IS NULL -- Page no longer exists in current dataset | ||
GROUP BY tech_before.technology | ||
) | ||
-- Final aggregation and comparison of technology adoption/deprecation metrics | ||
SELECT | ||
COALESCE(before_summary.technology, tech_introduced_existing_pages.technology, tech_introduced_new_pages.technology, apps.name) AS technology, | ||
-- Pages summary | ||
0-COALESCE(total_pages_deprecated_existing, 0) AS total_pages_deprecated_existing, | ||
0-COALESCE(total_pages_deprecated_gone, 0) AS total_pages_deprecated_gone, | ||
COALESCE(total_pages_before, 0) - COALESCE(total_pages_deprecated_existing, 0) - COALESCE(total_pages_deprecated_gone, 0) AS total_pages_persisted, | ||
COALESCE(total_pages_introduced_existing, 0) AS total_pages_introduced_existing, | ||
COALESCE(total_pages_introduced_new, 0) AS total_pages_introduced_new, | ||
-- Sample pages | ||
COALESCE(sample_pages_deprecated_existing, "") AS sample_pages_deprecated_existing, | ||
COALESCE(sample_pages_deprecated_gone, "") AS sample_pages_deprecated_gone, | ||
COALESCE(tech_introduced_existing_pages.sample_pages_introduced_existing, "") AS sample_pages_introduced_existing, | ||
COALESCE(tech_introduced_new_pages.sample_pages_introduced_new, "") AS sample_pages_introduced_new | ||
FROM tech_before_summary before_summary | ||
FULL OUTER JOIN tech_introduced_existing_pages | ||
ON before_summary.technology = tech_introduced_existing_pages.technology | ||
FULL OUTER JOIN tech_introduced_new_pages | ||
ON before_summary.technology = tech_introduced_new_pages.technology | ||
LEFT JOIN tech_deprecated_existing_pages | ||
ON before_summary.technology = tech_deprecated_existing_pages.technology | ||
LEFT JOIN tech_deprecated_gone_pages | ||
ON before_summary.technology = tech_deprecated_gone_pages.technology | ||
FULL OUTER JOIN wappalyzer.apps | ||
ON before_summary.technology = apps.name | ||
ORDER BY total_pages_persisted DESC | ||
`) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters