From 536fcd3a591633daf6c9d287f449a872608da94c Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Tue, 10 Dec 2024 14:31:35 +0300 Subject: [PATCH 01/13] Update dashboard chart query --- src/analytics/api/models/events.py | 6 ++---- src/analytics/api/utils/data_formatters.py | 25 ++++++++++++++++++++++ src/analytics/api/views/dashboard.py | 11 +++------- 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/analytics/api/models/events.py b/src/analytics/api/models/events.py index 4ac9e2febb..c17c32fa21 100644 --- a/src/analytics/api/models/events.py +++ b/src/analytics/api/models/events.py @@ -413,6 +413,7 @@ def download_from_bigquery( frequency (str): Data frequency (e.g., 'raw', 'daily', 'hourly'). pollutants (list): List of pollutants to include in the data. data_type (str): Type of data ('raw' or 'aggregated'). + filter_columns(list) weather_fields (list): List of weather fields to retrieve. Returns: @@ -1273,9 +1274,7 @@ def get_d3_chart_events(self, sites, start_date, end_date, pollutant, frequency) ) @cache.memoize() - def get_d3_chart_events_v2( - self, sites, start_date, end_date, pollutant, frequency, tenant - ): + def get_d3_chart_events_v2(self, sites, start_date, end_date, pollutant, frequency): if pollutant not in ["pm2_5", "pm10", "no2", "pm1"]: raise Exception("Invalid pollutant") @@ -1293,7 +1292,6 @@ def get_d3_chart_events_v2( JOIN {self.BIGQUERY_SITES} ON {self.BIGQUERY_SITES}.id = {self.BIGQUERY_EVENTS}.site_id WHERE {self.BIGQUERY_EVENTS}.timestamp >= '{start_date}' AND {self.BIGQUERY_EVENTS}.timestamp <= '{end_date}' - AND {self.BIGQUERY_EVENTS}.tenant = '{tenant}' AND `airqo-250220.metadata.sites`.id in UNNEST({sites}) """ diff --git a/src/analytics/api/utils/data_formatters.py b/src/analytics/api/utils/data_formatters.py index bace9559de..0c6fb6b377 100644 --- a/src/analytics/api/utils/data_formatters.py +++ b/src/analytics/api/utils/data_formatters.py @@ -328,6 +328,31 @@ def filter_non_private_sites(filter_type: str, sites: List[str]) -> Dict[str, An logger.exception(f"Error while filtering non private devices {rex}") +def validate_network(self, network_name: str) -> bool: + """ + Validate if a given network name exists in the list of networks. + + Args: + network_name (str): The name of the network to validate. + + Returns: + bool: True if the network name exists, False otherwise. + """ + if not network_name: + return False + + endpoint: str = "/users/networks" + airqo_requests = AirQoRequests() + response = airqo_requests.request(endpoint=endpoint, method="get") + + if response and "networks" in response: + networks = response["networks"] + # TODO Could add an active network filter + return any(network.get("net_name") == network_name for network in networks) + + return False + + def filter_non_private_devices(filter_type: str, devices: List[str]) -> Dict[str, Any]: """ FilterS out private device IDs from a provided array of device IDs. diff --git a/src/analytics/api/views/dashboard.py b/src/analytics/api/views/dashboard.py index 62d31bc01e..3e4429495e 100644 --- a/src/analytics/api/views/dashboard.py +++ b/src/analytics/api/views/dashboard.py @@ -206,7 +206,6 @@ def _get_validated_filter(self, json_data): return filter_type, validated_data, error_message def post(self): - tenant = request.args.get("tenant", "airqo") json_data = request.get_json() @@ -219,20 +218,16 @@ def post(self): except Exception as e: logger.exception(f"An error has occured; {e}") - sites = filter_non_private_sites("sites", json_data.get("sites", {})).get( - "sites", [] - ) - start_date = json_data["startDate"] end_date = json_data["endDate"] frequency = json_data["frequency"] pollutant = json_data["pollutant"] chart_type = json_data["chartType"] - events_model = EventsModel(tenant) - # data = events_model.get_d3_chart_events(sites, start_date, end_date, pollutant, frequency) + events_model = EventsModel("airqo") + data = events_model.get_d3_chart_events_v2( - filter_value, start_date, end_date, pollutant, frequency, tenant + filter_value, start_date, end_date, pollutant, frequency ) if chart_type.lower() == "pie": From 370f4583d4297e2b2e395cc3bb5fc870dfb1e50a Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Tue, 10 Dec 2024 14:43:14 +0300 Subject: [PATCH 02/13] Drop timestamp column --- src/analytics/api/views/data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/analytics/api/views/data.py b/src/analytics/api/views/data.py index 6038e80497..07ef72c0a2 100644 --- a/src/analytics/api/views/data.py +++ b/src/analytics/api/views/data.py @@ -166,6 +166,7 @@ def post(self): data_frame.drop( columns=[ "site_id", + "timestamp", ], inplace=True, ) From 368c59d9af60fbf432b807b668c9d82d491f3bd8 Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Tue, 10 Dec 2024 17:22:49 +0300 Subject: [PATCH 03/13] Handle non calibrated values --- src/analytics/api/models/events.py | 55 +++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/src/analytics/api/models/events.py b/src/analytics/api/models/events.py index c17c32fa21..a3883752f2 100644 --- a/src/analytics/api/models/events.py +++ b/src/analytics/api/models/events.py @@ -439,14 +439,11 @@ def download_from_bigquery( weather_columns = [] for pollutant in pollutants: - if pollutant == "raw": - key = pollutant - else: - key = f"{pollutant}_{data_type}" - + key = f"{pollutant}_{data_type}" pollutant_mapping = BIGQUERY_FREQUENCY_MAPPER.get(frequency, {}).get( key, [] ) + pollutant_columns.extend( cls.get_columns( cls, @@ -460,6 +457,10 @@ def download_from_bigquery( # TODO Clean up by use using `get_columns` helper method if pollutant in {"pm2_5", "pm10", "no2"}: + if data_type == "raw": + # Add dummy column to fix union column number missmatch. + bam_pollutant_columns.append("-1 as pm2_5") + if frequency in ["weekly", "monthly", "yearly"]: bam_pollutant_columns.extend( [f"ROUND(AVG({pollutant}), {decimal_places}) AS {key}_value"] @@ -468,6 +469,7 @@ def download_from_bigquery( bam_pollutant_columns.extend( [f"ROUND({pollutant}, {decimal_places}) AS {key}_value"] ) + # TODO Fix query when weather data is included. Currently failing if weather_fields: for field in weather_fields: @@ -537,12 +539,55 @@ def download_from_bigquery( drop_columns.append("datetime") sorting_cols.append("datetime") + if data_type == "raw": + cls.simple_data_cleaning(dataframe) + dataframe.drop_duplicates(subset=drop_columns, inplace=True, keep="first") dataframe.sort_values(sorting_cols, ascending=True, inplace=True) dataframe["frequency"] = frequency dataframe = dataframe.replace(np.nan, None) return dataframe + @classmethod + def simple_data_cleaning(cls, data: pd.DataFrame) -> pd.DataFrame: + """ + Perform data cleaning on a pandas DataFrame to handle specific conditions + related to "pm2_5" and "pm2_5_raw_value" columns. + + The cleaning process includes: + 1. Ensuring correct numeric data types for "pm2_5" and "pm2_5_raw_value". + 2. Removing "pm2_5" values where "pm2_5_raw_value" has data. + 3. Dropping the "pm2_5_raw_value" column if it has no data at all. + 4. Retaining "pm2_5" values where "pm2_5_raw_value" has no data, and removing + "pm2_5" values where "pm2_5_raw_value" has data. + 5. Dropping any column (including "pm2_5" and "pm2_5_raw_value") if it is + entirely empty. + + Args: + cls: Class reference (used in classmethods). + data (pd.DataFrame): Input pandas DataFrame with "pm2_5" and + "pm2_5_raw_value" columns. + + Returns: + pd.DataFrame: Cleaned DataFrame with updates applied in place. + + """ + data["pm2_5_raw_value"] = pd.to_numeric( + data["pm2_5_raw_value"], errors="coerce" + ) + data["pm2_5"] = pd.to_numeric(data["pm2_5"], errors="coerce") + + data.loc[~data["pm2_5_raw_value"].isna(), "pm2_5"] = np.nan + + if data["pm2_5_raw_value"].isna().all(): + data.drop(columns=["pm2_5_raw_value"], inplace=True) + + data["pm2_5"] = data["pm2_5"].where(data["pm2_5_raw_value"].isna(), np.nan) + + data.dropna(how="all", axis=1, inplace=True) + + return data + @classmethod def data_export_query( cls, From c1cf65941d5aebd63f1f769305b52df8eb6f4e78 Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Tue, 10 Dec 2024 17:23:38 +0300 Subject: [PATCH 04/13] Clean up --- src/analytics/api/utils/pollutants/pm_25.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/analytics/api/utils/pollutants/pm_25.py b/src/analytics/api/utils/pollutants/pm_25.py index 6e3f2a986b..9c16abf988 100644 --- a/src/analytics/api/utils/pollutants/pm_25.py +++ b/src/analytics/api/utils/pollutants/pm_25.py @@ -56,19 +56,14 @@ COMMON_POLLUTANT_MAPPING = { "pm2_5_calibrated": ["pm2_5_calibrated_value"], - "pm2_5_raw": ["pm2_5_raw_value"], + "pm2_5_raw": ["pm2_5_raw_value", "pm2_5"], "pm10_calibrated": ["pm10_calibrated_value"], - "pm10_raw": ["pm10_raw_value"], + "pm10_raw": ["pm10_raw_value", "pm10"], "no2_calibrated": ["no2_calibrated_value"], "no2_raw": ["no2_raw_value"], } BIGQUERY_FREQUENCY_MAPPER = { - "raw": { - "pm2_5": ["pm2_5", "s1_pm2_5", "s2_pm2_5"], - "pm10": ["pm10", "s1_pm10", "s2_pm10"], - "no2": ["no2"], - }, "daily": COMMON_POLLUTANT_MAPPING, "hourly": COMMON_POLLUTANT_MAPPING, "weekly": COMMON_POLLUTANT_MAPPING, From 84577f29ddfb0d3144219c7886e3a1b5be8ae4fb Mon Sep 17 00:00:00 2001 From: Nicholas Bob Date: Tue, 10 Dec 2024 17:44:02 +0300 Subject: [PATCH 05/13] Update data_formatters.py Clean up --- src/analytics/api/utils/data_formatters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/analytics/api/utils/data_formatters.py b/src/analytics/api/utils/data_formatters.py index 0c6fb6b377..d1f5eda567 100644 --- a/src/analytics/api/utils/data_formatters.py +++ b/src/analytics/api/utils/data_formatters.py @@ -328,7 +328,7 @@ def filter_non_private_sites(filter_type: str, sites: List[str]) -> Dict[str, An logger.exception(f"Error while filtering non private devices {rex}") -def validate_network(self, network_name: str) -> bool: +def validate_network(network_name: str) -> bool: """ Validate if a given network name exists in the list of networks. From ec8123a62ff7bcae81d700654dcc26c3d5395eb4 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 10 Dec 2024 18:21:55 +0300 Subject: [PATCH 06/13] Update AirQo exceedance production image tag to prod-ae2e15a3-1733844050 --- k8s/exceedance/values-prod-airqo.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/exceedance/values-prod-airqo.yaml b/k8s/exceedance/values-prod-airqo.yaml index 8fd10c0c0c..8eaf5c0da7 100644 --- a/k8s/exceedance/values-prod-airqo.yaml +++ b/k8s/exceedance/values-prod-airqo.yaml @@ -4,6 +4,6 @@ app: configmap: env-exceedance-production image: repository: eu.gcr.io/airqo-250220/airqo-exceedance-job - tag: prod-ee15b958-1733833086 + tag: prod-ae2e15a3-1733844050 nameOverride: '' fullnameOverride: '' From e78124c360b232c9575479730213618777982c52 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 10 Dec 2024 18:22:02 +0300 Subject: [PATCH 07/13] Update KCCA exceedance production image tag to prod-ae2e15a3-1733844050 --- k8s/exceedance/values-prod-kcca.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/exceedance/values-prod-kcca.yaml b/k8s/exceedance/values-prod-kcca.yaml index 72030a5345..afea05a6f6 100644 --- a/k8s/exceedance/values-prod-kcca.yaml +++ b/k8s/exceedance/values-prod-kcca.yaml @@ -4,6 +4,6 @@ app: configmap: env-exceedance-production image: repository: eu.gcr.io/airqo-250220/kcca-exceedance-job - tag: prod-ee15b958-1733833086 + tag: prod-ae2e15a3-1733844050 nameOverride: '' fullnameOverride: '' From 4280b7ed7b477ca1ddbe2a75f6a1b49816a2e1c7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 10 Dec 2024 18:22:31 +0300 Subject: [PATCH 08/13] Update device registry staging image tag to stage-c1e369be-1733844028 --- k8s/device-registry/values-stage.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/device-registry/values-stage.yaml b/k8s/device-registry/values-stage.yaml index c3d8dccf77..54b83d55b0 100644 --- a/k8s/device-registry/values-stage.yaml +++ b/k8s/device-registry/values-stage.yaml @@ -6,7 +6,7 @@ app: replicaCount: 2 image: repository: eu.gcr.io/airqo-250220/airqo-stage-device-registry-api - tag: stage-dfe6eb16-1733832983 + tag: stage-c1e369be-1733844028 nameOverride: '' fullnameOverride: '' podAnnotations: {} From a7b43476b587fa030f593b56d238af84daef2027 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 10 Dec 2024 18:22:48 +0300 Subject: [PATCH 09/13] Update device registry production image tag to prod-ae2e15a3-1733844050 --- k8s/device-registry/values-prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/device-registry/values-prod.yaml b/k8s/device-registry/values-prod.yaml index b4c32c4d56..9f7570f9bc 100644 --- a/k8s/device-registry/values-prod.yaml +++ b/k8s/device-registry/values-prod.yaml @@ -6,7 +6,7 @@ app: replicaCount: 3 image: repository: eu.gcr.io/airqo-250220/airqo-device-registry-api - tag: prod-ee15b958-1733833086 + tag: prod-ae2e15a3-1733844050 nameOverride: '' fullnameOverride: '' podAnnotations: {} From 3640ff8f6397d4fe1e93db09abc7cde11e6fa7ff Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 10 Dec 2024 18:23:17 +0300 Subject: [PATCH 10/13] Update website production image tag to prod-ae2e15a3-1733844050 --- k8s/website/values-prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/website/values-prod.yaml b/k8s/website/values-prod.yaml index 6460bba388..342d8be4aa 100644 --- a/k8s/website/values-prod.yaml +++ b/k8s/website/values-prod.yaml @@ -6,7 +6,7 @@ app: replicaCount: 3 image: repository: eu.gcr.io/airqo-250220/airqo-website-api - tag: prod-ee15b958-1733833086 + tag: prod-ae2e15a3-1733844050 nameOverride: '' fullnameOverride: '' podAnnotations: {} From fedfa5e7428aa99b7ad59a9cf0149323527a202c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 10 Dec 2024 18:23:42 +0300 Subject: [PATCH 11/13] Update workflows prod image tag to prod-ae2e15a3-1733844050 --- k8s/workflows/values-prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/workflows/values-prod.yaml b/k8s/workflows/values-prod.yaml index 723152603a..d0549ff120 100644 --- a/k8s/workflows/values-prod.yaml +++ b/k8s/workflows/values-prod.yaml @@ -10,7 +10,7 @@ images: initContainer: eu.gcr.io/airqo-250220/airqo-workflows-xcom redisContainer: eu.gcr.io/airqo-250220/airqo-redis containers: eu.gcr.io/airqo-250220/airqo-workflows - tag: prod-ee15b958-1733833086 + tag: prod-ae2e15a3-1733844050 nameOverride: '' fullnameOverride: '' podAnnotations: {} From f6100bad0b46cc1c3571800b141c57c2e4891878 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 10 Dec 2024 18:24:51 +0300 Subject: [PATCH 12/13] Update predict production image tag to prod-ae2e15a3-1733844050 --- k8s/predict/values-prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/predict/values-prod.yaml b/k8s/predict/values-prod.yaml index 70cf680436..18ec2275a1 100644 --- a/k8s/predict/values-prod.yaml +++ b/k8s/predict/values-prod.yaml @@ -7,7 +7,7 @@ images: predictJob: eu.gcr.io/airqo-250220/airqo-predict-job trainJob: eu.gcr.io/airqo-250220/airqo-train-job predictPlaces: eu.gcr.io/airqo-250220/airqo-predict-places-air-quality - tag: prod-ee15b958-1733833086 + tag: prod-ae2e15a3-1733844050 api: name: airqo-prediction-api label: prediction-api From ec1ff31aff44e64d2a541b5ef3f63c5f29032c74 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 10 Dec 2024 18:29:32 +0300 Subject: [PATCH 13/13] Update spatial production image tag to prod-ae2e15a3-1733844050 --- k8s/spatial/values-prod.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/spatial/values-prod.yaml b/k8s/spatial/values-prod.yaml index 3e448286e8..94644d2a35 100644 --- a/k8s/spatial/values-prod.yaml +++ b/k8s/spatial/values-prod.yaml @@ -6,7 +6,7 @@ app: replicaCount: 3 image: repository: eu.gcr.io/airqo-250220/airqo-spatial-api - tag: prod-ee15b958-1733833086 + tag: prod-ae2e15a3-1733844050 nameOverride: '' fullnameOverride: '' podAnnotations: {}