From 536fcd3a591633daf6c9d287f449a872608da94c Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Tue, 10 Dec 2024 14:31:35 +0300 Subject: [PATCH 1/5] Update dashboard chart query --- src/analytics/api/models/events.py | 6 ++---- src/analytics/api/utils/data_formatters.py | 25 ++++++++++++++++++++++ src/analytics/api/views/dashboard.py | 11 +++------- 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/analytics/api/models/events.py b/src/analytics/api/models/events.py index 4ac9e2febb..c17c32fa21 100644 --- a/src/analytics/api/models/events.py +++ b/src/analytics/api/models/events.py @@ -413,6 +413,7 @@ def download_from_bigquery( frequency (str): Data frequency (e.g., 'raw', 'daily', 'hourly'). pollutants (list): List of pollutants to include in the data. data_type (str): Type of data ('raw' or 'aggregated'). + filter_columns(list) weather_fields (list): List of weather fields to retrieve. Returns: @@ -1273,9 +1274,7 @@ def get_d3_chart_events(self, sites, start_date, end_date, pollutant, frequency) ) @cache.memoize() - def get_d3_chart_events_v2( - self, sites, start_date, end_date, pollutant, frequency, tenant - ): + def get_d3_chart_events_v2(self, sites, start_date, end_date, pollutant, frequency): if pollutant not in ["pm2_5", "pm10", "no2", "pm1"]: raise Exception("Invalid pollutant") @@ -1293,7 +1292,6 @@ def get_d3_chart_events_v2( JOIN {self.BIGQUERY_SITES} ON {self.BIGQUERY_SITES}.id = {self.BIGQUERY_EVENTS}.site_id WHERE {self.BIGQUERY_EVENTS}.timestamp >= '{start_date}' AND {self.BIGQUERY_EVENTS}.timestamp <= '{end_date}' - AND {self.BIGQUERY_EVENTS}.tenant = '{tenant}' AND `airqo-250220.metadata.sites`.id in UNNEST({sites}) """ diff --git a/src/analytics/api/utils/data_formatters.py b/src/analytics/api/utils/data_formatters.py index bace9559de..0c6fb6b377 100644 --- a/src/analytics/api/utils/data_formatters.py +++ b/src/analytics/api/utils/data_formatters.py @@ -328,6 +328,31 @@ def filter_non_private_sites(filter_type: str, sites: List[str]) -> Dict[str, An logger.exception(f"Error while filtering non private devices {rex}") +def validate_network(self, network_name: str) -> bool: + """ + Validate if a given network name exists in the list of networks. + + Args: + network_name (str): The name of the network to validate. + + Returns: + bool: True if the network name exists, False otherwise. + """ + if not network_name: + return False + + endpoint: str = "/users/networks" + airqo_requests = AirQoRequests() + response = airqo_requests.request(endpoint=endpoint, method="get") + + if response and "networks" in response: + networks = response["networks"] + # TODO Could add an active network filter + return any(network.get("net_name") == network_name for network in networks) + + return False + + def filter_non_private_devices(filter_type: str, devices: List[str]) -> Dict[str, Any]: """ FilterS out private device IDs from a provided array of device IDs. diff --git a/src/analytics/api/views/dashboard.py b/src/analytics/api/views/dashboard.py index 62d31bc01e..3e4429495e 100644 --- a/src/analytics/api/views/dashboard.py +++ b/src/analytics/api/views/dashboard.py @@ -206,7 +206,6 @@ def _get_validated_filter(self, json_data): return filter_type, validated_data, error_message def post(self): - tenant = request.args.get("tenant", "airqo") json_data = request.get_json() @@ -219,20 +218,16 @@ def post(self): except Exception as e: logger.exception(f"An error has occured; {e}") - sites = filter_non_private_sites("sites", json_data.get("sites", {})).get( - "sites", [] - ) - start_date = json_data["startDate"] end_date = json_data["endDate"] frequency = json_data["frequency"] pollutant = json_data["pollutant"] chart_type = json_data["chartType"] - events_model = EventsModel(tenant) - # data = events_model.get_d3_chart_events(sites, start_date, end_date, pollutant, frequency) + events_model = EventsModel("airqo") + data = events_model.get_d3_chart_events_v2( - filter_value, start_date, end_date, pollutant, frequency, tenant + filter_value, start_date, end_date, pollutant, frequency ) if chart_type.lower() == "pie": From 370f4583d4297e2b2e395cc3bb5fc870dfb1e50a Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Tue, 10 Dec 2024 14:43:14 +0300 Subject: [PATCH 2/5] Drop timestamp column --- src/analytics/api/views/data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/analytics/api/views/data.py b/src/analytics/api/views/data.py index 6038e80497..07ef72c0a2 100644 --- a/src/analytics/api/views/data.py +++ b/src/analytics/api/views/data.py @@ -166,6 +166,7 @@ def post(self): data_frame.drop( columns=[ "site_id", + "timestamp", ], inplace=True, ) From 368c59d9af60fbf432b807b668c9d82d491f3bd8 Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Tue, 10 Dec 2024 17:22:49 +0300 Subject: [PATCH 3/5] Handle non calibrated values --- src/analytics/api/models/events.py | 55 +++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/src/analytics/api/models/events.py b/src/analytics/api/models/events.py index c17c32fa21..a3883752f2 100644 --- a/src/analytics/api/models/events.py +++ b/src/analytics/api/models/events.py @@ -439,14 +439,11 @@ def download_from_bigquery( weather_columns = [] for pollutant in pollutants: - if pollutant == "raw": - key = pollutant - else: - key = f"{pollutant}_{data_type}" - + key = f"{pollutant}_{data_type}" pollutant_mapping = BIGQUERY_FREQUENCY_MAPPER.get(frequency, {}).get( key, [] ) + pollutant_columns.extend( cls.get_columns( cls, @@ -460,6 +457,10 @@ def download_from_bigquery( # TODO Clean up by use using `get_columns` helper method if pollutant in {"pm2_5", "pm10", "no2"}: + if data_type == "raw": + # Add dummy column to fix union column number missmatch. + bam_pollutant_columns.append("-1 as pm2_5") + if frequency in ["weekly", "monthly", "yearly"]: bam_pollutant_columns.extend( [f"ROUND(AVG({pollutant}), {decimal_places}) AS {key}_value"] @@ -468,6 +469,7 @@ def download_from_bigquery( bam_pollutant_columns.extend( [f"ROUND({pollutant}, {decimal_places}) AS {key}_value"] ) + # TODO Fix query when weather data is included. Currently failing if weather_fields: for field in weather_fields: @@ -537,12 +539,55 @@ def download_from_bigquery( drop_columns.append("datetime") sorting_cols.append("datetime") + if data_type == "raw": + cls.simple_data_cleaning(dataframe) + dataframe.drop_duplicates(subset=drop_columns, inplace=True, keep="first") dataframe.sort_values(sorting_cols, ascending=True, inplace=True) dataframe["frequency"] = frequency dataframe = dataframe.replace(np.nan, None) return dataframe + @classmethod + def simple_data_cleaning(cls, data: pd.DataFrame) -> pd.DataFrame: + """ + Perform data cleaning on a pandas DataFrame to handle specific conditions + related to "pm2_5" and "pm2_5_raw_value" columns. + + The cleaning process includes: + 1. Ensuring correct numeric data types for "pm2_5" and "pm2_5_raw_value". + 2. Removing "pm2_5" values where "pm2_5_raw_value" has data. + 3. Dropping the "pm2_5_raw_value" column if it has no data at all. + 4. Retaining "pm2_5" values where "pm2_5_raw_value" has no data, and removing + "pm2_5" values where "pm2_5_raw_value" has data. + 5. Dropping any column (including "pm2_5" and "pm2_5_raw_value") if it is + entirely empty. + + Args: + cls: Class reference (used in classmethods). + data (pd.DataFrame): Input pandas DataFrame with "pm2_5" and + "pm2_5_raw_value" columns. + + Returns: + pd.DataFrame: Cleaned DataFrame with updates applied in place. + + """ + data["pm2_5_raw_value"] = pd.to_numeric( + data["pm2_5_raw_value"], errors="coerce" + ) + data["pm2_5"] = pd.to_numeric(data["pm2_5"], errors="coerce") + + data.loc[~data["pm2_5_raw_value"].isna(), "pm2_5"] = np.nan + + if data["pm2_5_raw_value"].isna().all(): + data.drop(columns=["pm2_5_raw_value"], inplace=True) + + data["pm2_5"] = data["pm2_5"].where(data["pm2_5_raw_value"].isna(), np.nan) + + data.dropna(how="all", axis=1, inplace=True) + + return data + @classmethod def data_export_query( cls, From c1cf65941d5aebd63f1f769305b52df8eb6f4e78 Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Tue, 10 Dec 2024 17:23:38 +0300 Subject: [PATCH 4/5] Clean up --- src/analytics/api/utils/pollutants/pm_25.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/analytics/api/utils/pollutants/pm_25.py b/src/analytics/api/utils/pollutants/pm_25.py index 6e3f2a986b..9c16abf988 100644 --- a/src/analytics/api/utils/pollutants/pm_25.py +++ b/src/analytics/api/utils/pollutants/pm_25.py @@ -56,19 +56,14 @@ COMMON_POLLUTANT_MAPPING = { "pm2_5_calibrated": ["pm2_5_calibrated_value"], - "pm2_5_raw": ["pm2_5_raw_value"], + "pm2_5_raw": ["pm2_5_raw_value", "pm2_5"], "pm10_calibrated": ["pm10_calibrated_value"], - "pm10_raw": ["pm10_raw_value"], + "pm10_raw": ["pm10_raw_value", "pm10"], "no2_calibrated": ["no2_calibrated_value"], "no2_raw": ["no2_raw_value"], } BIGQUERY_FREQUENCY_MAPPER = { - "raw": { - "pm2_5": ["pm2_5", "s1_pm2_5", "s2_pm2_5"], - "pm10": ["pm10", "s1_pm10", "s2_pm10"], - "no2": ["no2"], - }, "daily": COMMON_POLLUTANT_MAPPING, "hourly": COMMON_POLLUTANT_MAPPING, "weekly": COMMON_POLLUTANT_MAPPING, From 84577f29ddfb0d3144219c7886e3a1b5be8ae4fb Mon Sep 17 00:00:00 2001 From: Nicholas Bob Date: Tue, 10 Dec 2024 17:44:02 +0300 Subject: [PATCH 5/5] Update data_formatters.py Clean up --- src/analytics/api/utils/data_formatters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/analytics/api/utils/data_formatters.py b/src/analytics/api/utils/data_formatters.py index 0c6fb6b377..d1f5eda567 100644 --- a/src/analytics/api/utils/data_formatters.py +++ b/src/analytics/api/utils/data_formatters.py @@ -328,7 +328,7 @@ def filter_non_private_sites(filter_type: str, sites: List[str]) -> Dict[str, An logger.exception(f"Error while filtering non private devices {rex}") -def validate_network(self, network_name: str) -> bool: +def validate_network(network_name: str) -> bool: """ Validate if a given network name exists in the list of networks.