From 46b3da6b3aaa23b39772f04c4e4655d2cb682dea Mon Sep 17 00:00:00 2001 From: Ramya Ragupathy Date: Wed, 3 Jul 2019 14:56:31 +0530 Subject: [PATCH 1/4] Replace materialized views with roll up tables --- sql/changesets.sql | 5 ++--- sql/changesets_countries.sql | 4 ++-- sql/changesets_hashtags.sql | 4 ++-- sql/countries.sql | 2 +- sql/hashtag_stats.sql | 2 +- sql/hashtags.sql | 2 +- sql/raw_countries_users.sql | 2 +- sql/raw_hashtags_users.sql | 2 +- sql/user_stats.sql | 2 +- sql/users.sql | 2 +- 10 files changed, 13 insertions(+), 14 deletions(-) diff --git a/sql/changesets.sql b/sql/changesets.sql index 03315a1..53c6601 100644 --- a/sql/changesets.sql +++ b/sql/changesets.sql @@ -1,5 +1,4 @@ --- view with a schema that matches the legacy changesets table -CREATE VIEW changesets AS +CREATE TABLE changesets AS SELECT id, roads_added road_count_add, @@ -16,4 +15,4 @@ CREATE VIEW changesets AS editor, user_id, created_at - FROM raw_changesets; + FROM raw_changesets; \ No newline at end of file diff --git a/sql/changesets_countries.sql b/sql/changesets_countries.sql index 08768e4..91eb55a 100644 --- a/sql/changesets_countries.sql +++ b/sql/changesets_countries.sql @@ -1,5 +1,5 @@ -CREATE VIEW changesets_countries AS +CREATE TABLE changesets_countries AS SELECT changeset_id, country_id - FROM raw_changesets_countries; + FROM raw_changesets_countries; \ No newline at end of file diff --git a/sql/changesets_hashtags.sql b/sql/changesets_hashtags.sql index 6d9e2f3..57770ac 100644 --- a/sql/changesets_hashtags.sql +++ b/sql/changesets_hashtags.sql @@ -1,5 +1,5 @@ -CREATE VIEW changesets_hashtags AS +CREATE TABLE changesets_hashtags AS SELECT changeset_id, hashtag_id - FROM raw_changesets_hashtags; + FROM raw_changesets_hashtags; \ No newline at end of file diff --git a/sql/countries.sql b/sql/countries.sql index a51d4ae..f0fca73 100644 --- a/sql/countries.sql +++ b/sql/countries.sql @@ -1,4 +1,4 @@ -CREATE VIEW countries AS +CREATE TABLE countries AS SELECT id, name, diff --git a/sql/hashtag_stats.sql b/sql/hashtag_stats.sql index 4f16d0c..2a48420 100644 --- a/sql/hashtag_stats.sql +++ b/sql/hashtag_stats.sql @@ -1,4 +1,4 @@ -CREATE MATERIALIZED VIEW hashtag_stats AS +CREATE TABLE hashtag_stats AS SELECT hashtag, count(c.id) changesets, diff --git a/sql/hashtags.sql b/sql/hashtags.sql index 38313f6..cd26580 100644 --- a/sql/hashtags.sql +++ b/sql/hashtags.sql @@ -1,5 +1,5 @@ -- view with a schema that matches the legacy hashtags table -CREATE VIEW hashtags AS +CREATE TABLE hashtags AS SELECT id, hashtag diff --git a/sql/raw_countries_users.sql b/sql/raw_countries_users.sql index da25876..5515b26 100644 --- a/sql/raw_countries_users.sql +++ b/sql/raw_countries_users.sql @@ -1,4 +1,4 @@ -CREATE MATERIALIZED VIEW raw_countries_users AS +CREATE TABLE raw_countries_users AS SELECT country_id, user_id, diff --git a/sql/raw_hashtags_users.sql b/sql/raw_hashtags_users.sql index 1b6957c..030c0a0 100644 --- a/sql/raw_hashtags_users.sql +++ b/sql/raw_hashtags_users.sql @@ -1,4 +1,4 @@ -CREATE MATERIALIZED VIEW raw_hashtags_users AS +CREATE TABLE raw_hashtags_users AS SELECT *, rank() OVER (ORDER BY edits DESC) edits_rank, rank() OVER (ORDER BY buildings DESC) buildings_rank, diff --git a/sql/user_stats.sql b/sql/user_stats.sql index 8119515..9a11dec 100644 --- a/sql/user_stats.sql +++ b/sql/user_stats.sql @@ -1,4 +1,4 @@ -CREATE MATERIALIZED VIEW user_stats AS +CREATE TABLE user_stats AS SELECT user_id, name, diff --git a/sql/users.sql b/sql/users.sql index cfdf541..c0dce9f 100644 --- a/sql/users.sql +++ b/sql/users.sql @@ -1,7 +1,7 @@ CREATE EXTENSION IF NOT EXISTS postgis; -- view with a schema that matches the legacy users table -CREATE VIEW users AS +CREATE TABLE users AS SELECT id, u.name, From de02f69359c5f55829c4ba1a752675f3980172ce Mon Sep 17 00:00:00 2001 From: Ramya Ragupathy Date: Wed, 3 Jul 2019 16:32:02 +0530 Subject: [PATCH 2/4] Refresh -> Insert --- src/housekeeping.js | 86 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 4 deletions(-) diff --git a/src/housekeeping.js b/src/housekeeping.js index 0dc6f02..054547c 100644 --- a/src/housekeeping.js +++ b/src/housekeeping.js @@ -2,10 +2,88 @@ const env = require("require-env"); const { Pool } = require("pg"); const QUERIES = [ - "REFRESH MATERIALIZED VIEW CONCURRENTLY hashtag_stats", - "REFRESH MATERIALIZED VIEW CONCURRENTLY raw_countries_users", - "REFRESH MATERIALIZED VIEW CONCURRENTLY raw_hashtags_users", - "REFRESH MATERIALIZED VIEW CONCURRENTLY user_stats" + `INSERT INTO hashtag_stats + SELECT + hashtag, + count(c.id) changesets, + count(distinct c.user_id) users, + sum(road_km_added) road_km_added, + sum(road_km_modified) road_km_modified, + sum(waterway_km_added) waterway_km_added, + sum(waterway_km_modified) waterway_km_modified, + sum(roads_added) roads_added, + sum(roads_modified) roads_modified, + sum(waterways_added) waterways_added, + sum(waterways_modified) waterways_modified, + sum(buildings_added) buildings_added, + sum(buildings_modified) buildings_modified, + sum(pois_added) pois_added, + sum(pois_modified) pois_modified, + sum(CASE + WHEN position('josm' in lower(editor)) > 0 THEN 1 + ELSE 0 + END) josm_edits, + max(coalesce(closed_at, created_at)) updated_at + FROM raw_changesets_hashtags ch + JOIN raw_changesets c ON c.id = ch.changeset_id + JOIN raw_hashtags h ON h.id = ch.hashtag_id + GROUP BY hashtag;`, + `INSERT INTO raw_countries_users AS + SELECT + country_id, + user_id, + count(id) changesets + FROM raw_changesets_countries + JOIN raw_changesets ON raw_changesets.id = raw_changesets_countries.changeset_id + GROUP BY country_id, user_id;`, + `INSERT INTO raw_hashtags_users AS + SELECT *, + rank() OVER (ORDER BY edits DESC) edits_rank, + rank() OVER (ORDER BY buildings DESC) buildings_rank, + rank() OVER (ORDER BY road_km DESC) road_km_rank, + rank() OVER (ORDER BY updated_at DESC) updated_at_rank + FROM ( + SELECT + raw_changesets_hashtags.hashtag_id, + raw_changesets.user_id, + count(id) changesets, + sum(buildings_added + buildings_modified + roads_added + roads_modified + waterways_added + waterways_modified + pois_added + pois_modified) edits, + sum(buildings_added + buildings_modified) buildings, + sum(roads_added + roads_modified) roads, + sum(road_km_added + road_km_modified) road_km, + max(coalesce(closed_at, created_at)) updated_at + FROM raw_changesets + JOIN raw_changesets_hashtags ON raw_changesets_hashtags.changeset_id = raw_changesets.id + GROUP BY hashtag_id, user_id + ) AS _;`, + `INSERT INTO user_stats AS + SELECT + user_id, + name, + count(raw_changesets.id) changesets, + sum(road_km_added) road_km_added, + sum(road_km_modified) road_km_modified, + sum(waterway_km_added) waterway_km_added, + sum(waterway_km_modified) waterway_km_modified, + sum(roads_added) roads_added, + sum(roads_modified) roads_modified, + sum(waterways_added) waterways_added, + sum(waterways_modified) waterways_modified, + sum(buildings_added) buildings_added, + sum(buildings_modified) buildings_modified, + sum(pois_added) pois_added, + sum(pois_modified) pois_modified, + sum(CASE + WHEN position('josm' in lower(editor)) > 0 THEN 1 + ELSE 0 + END) josm_edits, + (SELECT count(country_id) FROM raw_countries_users where raw_countries_users.user_id = raw_changesets.user_id) AS countries, + (SELECT count(hashtag_id) FROM raw_hashtags_users where raw_hashtags_users.user_id = raw_changesets.user_id) AS hashtags, + max(coalesce(closed_at, created_at)) updated_at + FROM raw_changesets + JOIN raw_users ON raw_changesets.user_id = raw_users.id + WHERE user_id IS NOT NULL + GROUP BY user_id, name;` ]; const query = async (pool, query) => { From c63119f8d964c23b4197c2ccee8a778f5daae03b Mon Sep 17 00:00:00 2001 From: Ramya Ragupathy Date: Wed, 3 Jul 2019 18:39:47 +0530 Subject: [PATCH 3/4] Handle conflict --- src/housekeeping.js | 67 ++++----------------------------------------- 1 file changed, 5 insertions(+), 62 deletions(-) diff --git a/src/housekeeping.js b/src/housekeeping.js index 054547c..4091e49 100644 --- a/src/housekeeping.js +++ b/src/housekeeping.js @@ -19,71 +19,14 @@ const QUERIES = [ sum(buildings_modified) buildings_modified, sum(pois_added) pois_added, sum(pois_modified) pois_modified, - sum(CASE - WHEN position('josm' in lower(editor)) > 0 THEN 1 - ELSE 0 - END) josm_edits, - max(coalesce(closed_at, created_at)) updated_at + sum(pois_modified) josm_edits, + max(coalesce(closed_at, created_at)) FROM raw_changesets_hashtags ch JOIN raw_changesets c ON c.id = ch.changeset_id JOIN raw_hashtags h ON h.id = ch.hashtag_id - GROUP BY hashtag;`, - `INSERT INTO raw_countries_users AS - SELECT - country_id, - user_id, - count(id) changesets - FROM raw_changesets_countries - JOIN raw_changesets ON raw_changesets.id = raw_changesets_countries.changeset_id - GROUP BY country_id, user_id;`, - `INSERT INTO raw_hashtags_users AS - SELECT *, - rank() OVER (ORDER BY edits DESC) edits_rank, - rank() OVER (ORDER BY buildings DESC) buildings_rank, - rank() OVER (ORDER BY road_km DESC) road_km_rank, - rank() OVER (ORDER BY updated_at DESC) updated_at_rank - FROM ( - SELECT - raw_changesets_hashtags.hashtag_id, - raw_changesets.user_id, - count(id) changesets, - sum(buildings_added + buildings_modified + roads_added + roads_modified + waterways_added + waterways_modified + pois_added + pois_modified) edits, - sum(buildings_added + buildings_modified) buildings, - sum(roads_added + roads_modified) roads, - sum(road_km_added + road_km_modified) road_km, - max(coalesce(closed_at, created_at)) updated_at - FROM raw_changesets - JOIN raw_changesets_hashtags ON raw_changesets_hashtags.changeset_id = raw_changesets.id - GROUP BY hashtag_id, user_id - ) AS _;`, - `INSERT INTO user_stats AS - SELECT - user_id, - name, - count(raw_changesets.id) changesets, - sum(road_km_added) road_km_added, - sum(road_km_modified) road_km_modified, - sum(waterway_km_added) waterway_km_added, - sum(waterway_km_modified) waterway_km_modified, - sum(roads_added) roads_added, - sum(roads_modified) roads_modified, - sum(waterways_added) waterways_added, - sum(waterways_modified) waterways_modified, - sum(buildings_added) buildings_added, - sum(buildings_modified) buildings_modified, - sum(pois_added) pois_added, - sum(pois_modified) pois_modified, - sum(CASE - WHEN position('josm' in lower(editor)) > 0 THEN 1 - ELSE 0 - END) josm_edits, - (SELECT count(country_id) FROM raw_countries_users where raw_countries_users.user_id = raw_changesets.user_id) AS countries, - (SELECT count(hashtag_id) FROM raw_hashtags_users where raw_hashtags_users.user_id = raw_changesets.user_id) AS hashtags, - max(coalesce(closed_at, created_at)) updated_at - FROM raw_changesets - JOIN raw_users ON raw_changesets.user_id = raw_users.id - WHERE user_id IS NOT NULL - GROUP BY user_id, name;` + GROUP BY hashtag + ON CONFLICT do nothing;` + ]; const query = async (pool, query) => { From c0a00b79e833e5cd4153cd4a8b3a7d730f3873e9 Mon Sep 17 00:00:00 2001 From: Ramya Ragupathy Date: Tue, 6 Aug 2019 10:18:59 +0530 Subject: [PATCH 4/4] On conflict action --- src/housekeeping.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/housekeeping.js b/src/housekeeping.js index 4091e49..d42bc15 100644 --- a/src/housekeeping.js +++ b/src/housekeeping.js @@ -25,7 +25,7 @@ const QUERIES = [ JOIN raw_changesets c ON c.id = ch.changeset_id JOIN raw_hashtags h ON h.id = ch.hashtag_id GROUP BY hashtag - ON CONFLICT do nothing;` + ON CONFLICT do update;` ];