From edd3dba2bbd99b3165f74e09f29467ea1d65b64f Mon Sep 17 00:00:00 2001 From: Martin Aceto Date: Wed, 25 Oct 2023 17:17:35 -0400 Subject: [PATCH] fixes on migration scripts --- .gitignore | 3 ++- scripts/script_adoption.py | 4 ++-- scripts/script_categories.py | 5 +++-- scripts/script_core_web_vitals.py | 10 ++++++---- scripts/script_lighthouse.py | 26 +++++++++++++++++++------- 5 files changed, 32 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index 1aff640..ec3cc63 100644 --- a/.gitignore +++ b/.gitignore @@ -32,4 +32,5 @@ terraform.rc __pycache__ .pytest_cache -utils.txt \ No newline at end of file +utils.txt +logs \ No newline at end of file diff --git a/scripts/script_adoption.py b/scripts/script_adoption.py index 411eb24..fd14366 100644 --- a/scripts/script_adoption.py +++ b/scripts/script_adoption.py @@ -42,12 +42,12 @@ def execute_query_and_insert_result(start_date, end_date): ))) AS adoption FROM `httparchive.core_web_vitals.technologies` - WHERE + """ # Construct the WHERE clause based on the provided parameters if start_date and end_date: - query += f" date >= '{start_date}' AND date <= '{end_date}'" + query += f"WHERE date >= '{start_date}' AND date <= '{end_date}'" query += " GROUP BY date, app, rank, geo" diff --git a/scripts/script_categories.py b/scripts/script_categories.py index f99467e..3ddc958 100644 --- a/scripts/script_categories.py +++ b/scripts/script_categories.py @@ -77,12 +77,13 @@ def execute_query_and_insert_result(start_date, end_date): # Create a new Firestore document for each result and insert it into the "technologies" collection collection_ref = firestore_client.collection('categories') - print(results) + + print("Data inserted started.") for row in results: item = dict(row.items()) - print(item) + #print(item) doc_ref = collection_ref.document() doc_ref.set(item) diff --git a/scripts/script_core_web_vitals.py b/scripts/script_core_web_vitals.py index 3aa65bf..d27ae62 100644 --- a/scripts/script_core_web_vitals.py +++ b/scripts/script_core_web_vitals.py @@ -95,12 +95,12 @@ def execute_query_and_insert_result(start_date, end_date): ))) AS vitals FROM `httparchive.core_web_vitals.technologies` - WHERE + """ # Construct the WHERE clause based on the provided parameters if start_date and end_date: - query += f" date >= '{start_date}' AND date <= '{end_date}'" + query += f"WHERE date >= '{start_date}' AND date <= '{end_date}'" query += " GROUP BY date, app, rank, geo" @@ -110,13 +110,15 @@ def execute_query_and_insert_result(start_date, end_date): # Create a new Firestore document for each result and insert it into the "technologies" collection collection_ref = firestore_client.collection('core_web_vitals') - print(results) + #print(results) + + print("Data inserted started.") for row in results: item = dict(row.items()) item['date'] = str(row['date']) - print(item) + #print(item) doc_ref = collection_ref.document() doc_ref.set(item) diff --git a/scripts/script_lighthouse.py b/scripts/script_lighthouse.py index b912359..114e357 100644 --- a/scripts/script_lighthouse.py +++ b/scripts/script_lighthouse.py @@ -1,4 +1,5 @@ import sys +import uuid from google.cloud import bigquery from google.cloud import firestore from decimal import Decimal @@ -97,21 +98,32 @@ def execute_query_and_insert_result(start_date, end_date): query_job = bq_client.query(query) results = query_job.result() - # Create a new Firestore document for each result and insert it into the "technologies" collection collection_ref = firestore_client.collection('lighthouse') - print(results) - for row in results: + idx = 0 + + print("Data insert process started.") + + batch = collection_ref.batch() + for row in results: + # Convert date + # item = dict(row.items()) item['date'] = str(row['date']) - item = convert_decimal_to_float(item) - print(item) + record_ref = collection_ref.document(uuid.uuid4().hex) + batch.set(record_ref, row) + idx += 1 - doc_ref = collection_ref.document() - doc_ref.set(item) + # Commit the batch at every 500th record. + if idx == 499: + batch.commit() + # Start a new batch for the next iteration. + batch = collection_ref.batch() + idx = 0 + batch.commit() print("Data inserted into Firestore successfully.") # Get command-line arguments