Skip to content

Commit

Permalink
Merge pull request #4039 from airqo-platform/staging
Browse files Browse the repository at this point in the history
move to production
  • Loading branch information
Baalmart authored Dec 10, 2024
2 parents ae2e15a + dd764c2 commit 5e45c95
Show file tree
Hide file tree
Showing 13 changed files with 91 additions and 32 deletions.
2 changes: 1 addition & 1 deletion k8s/device-registry/values-prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ app:
replicaCount: 3
image:
repository: eu.gcr.io/airqo-250220/airqo-device-registry-api
tag: prod-ee15b958-1733833086
tag: prod-ae2e15a3-1733844050
nameOverride: ''
fullnameOverride: ''
podAnnotations: {}
Expand Down
2 changes: 1 addition & 1 deletion k8s/device-registry/values-stage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ app:
replicaCount: 2
image:
repository: eu.gcr.io/airqo-250220/airqo-stage-device-registry-api
tag: stage-dfe6eb16-1733832983
tag: stage-c1e369be-1733844028
nameOverride: ''
fullnameOverride: ''
podAnnotations: {}
Expand Down
2 changes: 1 addition & 1 deletion k8s/exceedance/values-prod-airqo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ app:
configmap: env-exceedance-production
image:
repository: eu.gcr.io/airqo-250220/airqo-exceedance-job
tag: prod-ee15b958-1733833086
tag: prod-ae2e15a3-1733844050
nameOverride: ''
fullnameOverride: ''
2 changes: 1 addition & 1 deletion k8s/exceedance/values-prod-kcca.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ app:
configmap: env-exceedance-production
image:
repository: eu.gcr.io/airqo-250220/kcca-exceedance-job
tag: prod-ee15b958-1733833086
tag: prod-ae2e15a3-1733844050
nameOverride: ''
fullnameOverride: ''
2 changes: 1 addition & 1 deletion k8s/predict/values-prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ images:
predictJob: eu.gcr.io/airqo-250220/airqo-predict-job
trainJob: eu.gcr.io/airqo-250220/airqo-train-job
predictPlaces: eu.gcr.io/airqo-250220/airqo-predict-places-air-quality
tag: prod-ee15b958-1733833086
tag: prod-ae2e15a3-1733844050
api:
name: airqo-prediction-api
label: prediction-api
Expand Down
2 changes: 1 addition & 1 deletion k8s/spatial/values-prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ app:
replicaCount: 3
image:
repository: eu.gcr.io/airqo-250220/airqo-spatial-api
tag: prod-ee15b958-1733833086
tag: prod-ae2e15a3-1733844050
nameOverride: ''
fullnameOverride: ''
podAnnotations: {}
Expand Down
2 changes: 1 addition & 1 deletion k8s/website/values-prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ app:
replicaCount: 3
image:
repository: eu.gcr.io/airqo-250220/airqo-website-api
tag: prod-ee15b958-1733833086
tag: prod-ae2e15a3-1733844050
nameOverride: ''
fullnameOverride: ''
podAnnotations: {}
Expand Down
2 changes: 1 addition & 1 deletion k8s/workflows/values-prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ images:
initContainer: eu.gcr.io/airqo-250220/airqo-workflows-xcom
redisContainer: eu.gcr.io/airqo-250220/airqo-redis
containers: eu.gcr.io/airqo-250220/airqo-workflows
tag: prod-ee15b958-1733833086
tag: prod-ae2e15a3-1733844050
nameOverride: ''
fullnameOverride: ''
podAnnotations: {}
Expand Down
61 changes: 52 additions & 9 deletions src/analytics/api/models/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ def download_from_bigquery(
frequency (str): Data frequency (e.g., 'raw', 'daily', 'hourly').
pollutants (list): List of pollutants to include in the data.
data_type (str): Type of data ('raw' or 'aggregated').
filter_columns(list)
weather_fields (list): List of weather fields to retrieve.
Returns:
Expand All @@ -438,14 +439,11 @@ def download_from_bigquery(
weather_columns = []
for pollutant in pollutants:

if pollutant == "raw":
key = pollutant
else:
key = f"{pollutant}_{data_type}"

key = f"{pollutant}_{data_type}"
pollutant_mapping = BIGQUERY_FREQUENCY_MAPPER.get(frequency, {}).get(
key, []
)

pollutant_columns.extend(
cls.get_columns(
cls,
Expand All @@ -459,6 +457,10 @@ def download_from_bigquery(

# TODO Clean up by use using `get_columns` helper method
if pollutant in {"pm2_5", "pm10", "no2"}:
if data_type == "raw":
# Add dummy column to fix union column number missmatch.
bam_pollutant_columns.append("-1 as pm2_5")

if frequency in ["weekly", "monthly", "yearly"]:
bam_pollutant_columns.extend(
[f"ROUND(AVG({pollutant}), {decimal_places}) AS {key}_value"]
Expand All @@ -467,6 +469,7 @@ def download_from_bigquery(
bam_pollutant_columns.extend(
[f"ROUND({pollutant}, {decimal_places}) AS {key}_value"]
)

# TODO Fix query when weather data is included. Currently failing
if weather_fields:
for field in weather_fields:
Expand Down Expand Up @@ -536,12 +539,55 @@ def download_from_bigquery(
drop_columns.append("datetime")
sorting_cols.append("datetime")

if data_type == "raw":
cls.simple_data_cleaning(dataframe)

dataframe.drop_duplicates(subset=drop_columns, inplace=True, keep="first")
dataframe.sort_values(sorting_cols, ascending=True, inplace=True)
dataframe["frequency"] = frequency
dataframe = dataframe.replace(np.nan, None)
return dataframe

@classmethod
def simple_data_cleaning(cls, data: pd.DataFrame) -> pd.DataFrame:
"""
Perform data cleaning on a pandas DataFrame to handle specific conditions
related to "pm2_5" and "pm2_5_raw_value" columns.
The cleaning process includes:
1. Ensuring correct numeric data types for "pm2_5" and "pm2_5_raw_value".
2. Removing "pm2_5" values where "pm2_5_raw_value" has data.
3. Dropping the "pm2_5_raw_value" column if it has no data at all.
4. Retaining "pm2_5" values where "pm2_5_raw_value" has no data, and removing
"pm2_5" values where "pm2_5_raw_value" has data.
5. Dropping any column (including "pm2_5" and "pm2_5_raw_value") if it is
entirely empty.
Args:
cls: Class reference (used in classmethods).
data (pd.DataFrame): Input pandas DataFrame with "pm2_5" and
"pm2_5_raw_value" columns.
Returns:
pd.DataFrame: Cleaned DataFrame with updates applied in place.
"""
data["pm2_5_raw_value"] = pd.to_numeric(
data["pm2_5_raw_value"], errors="coerce"
)
data["pm2_5"] = pd.to_numeric(data["pm2_5"], errors="coerce")

data.loc[~data["pm2_5_raw_value"].isna(), "pm2_5"] = np.nan

if data["pm2_5_raw_value"].isna().all():
data.drop(columns=["pm2_5_raw_value"], inplace=True)

data["pm2_5"] = data["pm2_5"].where(data["pm2_5_raw_value"].isna(), np.nan)

data.dropna(how="all", axis=1, inplace=True)

return data

@classmethod
def data_export_query(
cls,
Expand Down Expand Up @@ -1273,9 +1319,7 @@ def get_d3_chart_events(self, sites, start_date, end_date, pollutant, frequency)
)

@cache.memoize()
def get_d3_chart_events_v2(
self, sites, start_date, end_date, pollutant, frequency, tenant
):
def get_d3_chart_events_v2(self, sites, start_date, end_date, pollutant, frequency):
if pollutant not in ["pm2_5", "pm10", "no2", "pm1"]:
raise Exception("Invalid pollutant")

Expand All @@ -1293,7 +1337,6 @@ def get_d3_chart_events_v2(
JOIN {self.BIGQUERY_SITES} ON {self.BIGQUERY_SITES}.id = {self.BIGQUERY_EVENTS}.site_id
WHERE {self.BIGQUERY_EVENTS}.timestamp >= '{start_date}'
AND {self.BIGQUERY_EVENTS}.timestamp <= '{end_date}'
AND {self.BIGQUERY_EVENTS}.tenant = '{tenant}'
AND `airqo-250220.metadata.sites`.id in UNNEST({sites})
"""

Expand Down
25 changes: 25 additions & 0 deletions src/analytics/api/utils/data_formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,31 @@ def filter_non_private_sites(filter_type: str, sites: List[str]) -> Dict[str, An
logger.exception(f"Error while filtering non private devices {rex}")


def validate_network(network_name: str) -> bool:
"""
Validate if a given network name exists in the list of networks.
Args:
network_name (str): The name of the network to validate.
Returns:
bool: True if the network name exists, False otherwise.
"""
if not network_name:
return False

endpoint: str = "/users/networks"
airqo_requests = AirQoRequests()
response = airqo_requests.request(endpoint=endpoint, method="get")

if response and "networks" in response:
networks = response["networks"]
# TODO Could add an active network filter
return any(network.get("net_name") == network_name for network in networks)

return False


def filter_non_private_devices(filter_type: str, devices: List[str]) -> Dict[str, Any]:
"""
FilterS out private device IDs from a provided array of device IDs.
Expand Down
9 changes: 2 additions & 7 deletions src/analytics/api/utils/pollutants/pm_25.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,14 @@

COMMON_POLLUTANT_MAPPING = {
"pm2_5_calibrated": ["pm2_5_calibrated_value"],
"pm2_5_raw": ["pm2_5_raw_value"],
"pm2_5_raw": ["pm2_5_raw_value", "pm2_5"],
"pm10_calibrated": ["pm10_calibrated_value"],
"pm10_raw": ["pm10_raw_value"],
"pm10_raw": ["pm10_raw_value", "pm10"],
"no2_calibrated": ["no2_calibrated_value"],
"no2_raw": ["no2_raw_value"],
}

BIGQUERY_FREQUENCY_MAPPER = {
"raw": {
"pm2_5": ["pm2_5", "s1_pm2_5", "s2_pm2_5"],
"pm10": ["pm10", "s1_pm10", "s2_pm10"],
"no2": ["no2"],
},
"daily": COMMON_POLLUTANT_MAPPING,
"hourly": COMMON_POLLUTANT_MAPPING,
"weekly": COMMON_POLLUTANT_MAPPING,
Expand Down
11 changes: 3 additions & 8 deletions src/analytics/api/views/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,6 @@ def _get_validated_filter(self, json_data):
return filter_type, validated_data, error_message

def post(self):
tenant = request.args.get("tenant", "airqo")

json_data = request.get_json()

Expand All @@ -219,20 +218,16 @@ def post(self):
except Exception as e:
logger.exception(f"An error has occured; {e}")

sites = filter_non_private_sites("sites", json_data.get("sites", {})).get(
"sites", []
)

start_date = json_data["startDate"]
end_date = json_data["endDate"]
frequency = json_data["frequency"]
pollutant = json_data["pollutant"]
chart_type = json_data["chartType"]

events_model = EventsModel(tenant)
# data = events_model.get_d3_chart_events(sites, start_date, end_date, pollutant, frequency)
events_model = EventsModel("airqo")

data = events_model.get_d3_chart_events_v2(
filter_value, start_date, end_date, pollutant, frequency, tenant
filter_value, start_date, end_date, pollutant, frequency
)

if chart_type.lower() == "pie":
Expand Down
1 change: 1 addition & 0 deletions src/analytics/api/views/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ def post(self):
data_frame.drop(
columns=[
"site_id",
"timestamp",
],
inplace=True,
)
Expand Down

0 comments on commit 5e45c95

Please sign in to comment.