From f8ee1cf51c44dd3adceff7020d8e2f2e938330ae Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Wed, 4 Dec 2024 12:44:18 +0300 Subject: [PATCH 1/3] Add networks to schemas --- .../schema/airqo_mobile_measurements.json | 319 +++++---- .../schema/bam_measurements.json | 7 +- .../schema/data_warehouse.json | 669 +++++++++--------- .../airqo_etl_utils/schema/devices.json | 7 +- .../schema/latest_measurements.json | 479 ++++++------- .../schema/mobile_measurements.json | 7 +- 6 files changed, 759 insertions(+), 729 deletions(-) diff --git a/src/workflows/airqo_etl_utils/schema/airqo_mobile_measurements.json b/src/workflows/airqo_etl_utils/schema/airqo_mobile_measurements.json index d0fad4bfb2..b3a4ea0a30 100644 --- a/src/workflows/airqo_etl_utils/schema/airqo_mobile_measurements.json +++ b/src/workflows/airqo_etl_utils/schema/airqo_mobile_measurements.json @@ -1,158 +1,163 @@ [ - { - "name": "tenant", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "timestamp", - "type": "TIMESTAMP", - "mode": "NULLABLE" - }, - { - "name": "device_number", - "type": "INTEGER", - "mode": "NULLABLE" - }, - { - "name": "device_id", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "latitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "longitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "s1_pm2_5", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "s2_pm2_5", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "s1_pm10", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "s2_pm10", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "altitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "battery", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "satellites", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "hdop", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_temperature", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_humidity", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "pm2_5_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "pm10_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "pm2_5_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "pm10_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3." - }, - { - "name": "temperature", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "°C." - }, - { - "name": "humidity", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "%." - }, - { - "name": "wind_speed", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "m/s." - }, - { - "name": "atmospheric_pressure", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "kPa." - }, - { - "name": "radiation", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "W/m2." - }, - { - "name": "wind_gusts", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "m/s." - }, - { - "name": "precipitation", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "mm." - }, - { - "name": "wind_direction", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "degrees" - } -] \ No newline at end of file + { + "name": "tenant", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "network", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "timestamp", + "type": "TIMESTAMP", + "mode": "NULLABLE" + }, + { + "name": "device_number", + "type": "INTEGER", + "mode": "NULLABLE" + }, + { + "name": "device_id", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "latitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "longitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "s1_pm2_5", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "s2_pm2_5", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "s1_pm10", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "s2_pm10", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "altitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "battery", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "satellites", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "hdop", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_temperature", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_humidity", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "pm2_5_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "pm10_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "pm2_5_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "pm10_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3." + }, + { + "name": "temperature", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "°C." + }, + { + "name": "humidity", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "%." + }, + { + "name": "wind_speed", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "m/s." + }, + { + "name": "atmospheric_pressure", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "kPa." + }, + { + "name": "radiation", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "W/m2." + }, + { + "name": "wind_gusts", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "m/s." + }, + { + "name": "precipitation", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "mm." + }, + { + "name": "wind_direction", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "degrees" + } +] diff --git a/src/workflows/airqo_etl_utils/schema/bam_measurements.json b/src/workflows/airqo_etl_utils/schema/bam_measurements.json index dbc9bb129e..9edb357713 100644 --- a/src/workflows/airqo_etl_utils/schema/bam_measurements.json +++ b/src/workflows/airqo_etl_utils/schema/bam_measurements.json @@ -4,6 +4,11 @@ "type": "STRING", "mode": "NULLABLE" }, + { + "name": "network", + "type": "STRING", + "mode": "NULLABLE" + }, { "name": "site_id", "type": "STRING", @@ -58,4 +63,4 @@ "mode": "NULLABLE", "description": "μg/m3." } -] \ No newline at end of file +] diff --git a/src/workflows/airqo_etl_utils/schema/data_warehouse.json b/src/workflows/airqo_etl_utils/schema/data_warehouse.json index cdbcf031f8..48290d3fc7 100644 --- a/src/workflows/airqo_etl_utils/schema/data_warehouse.json +++ b/src/workflows/airqo_etl_utils/schema/data_warehouse.json @@ -1,333 +1,338 @@ [ - { - "name": "tenant", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "timestamp", - "type": "TIMESTAMP", - "mode": "NULLABLE" - }, - { - "name": "site_id", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_name", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_description", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_latitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_longitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_altitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_number", - "type": "INTEGER", - "mode": "NULLABLE" - }, - { - "name": "device_id", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "device_category", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "device_latitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_longitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_battery", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_altitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_temperature", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_humidity", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "pm2_5", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "s1_pm2_5", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 2.5 of sensor one" - }, - { - "name": "s2_pm2_5", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 2.5 of sensor two" - }, - { - "name": "pm2_5_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the pm 2.5 average value of sensor one and sensor two" - }, - { - "name": "pm2_5_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the calibrated pm 2.5 value." - }, - { - "name": "pm10", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "s1_pm10", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 10 of sensor one" - }, - { - "name": "s2_pm10", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 10 of sensor two" - }, - { - "name": "pm10_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the pm 10 average value of sensor one and sensor two." - }, - { - "name": "pm10_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the calibrated pm 10 value. " - }, - { - "name": "no2", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "no2_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "ppb.\nNO2 Concentration raw value" - }, - { - "name": "no2_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "ppb.\nEquals the NO2 Concentration value if available else equals the raw value." - }, - { - "name": "pm1", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "pm1_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 1 raw value. " - }, - { - "name": "pm1_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 1 calibrated value. " - }, - { - "name": "temperature", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "°C." - }, - { - "name": "humidity", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "%." - }, - { - "name": "wind_speed", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "m/s." - }, - { - "name": "atmospheric_pressure", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "kPa." - }, - { - "name": "radiation", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "W/m2." - }, - { - "name": "vapor_pressure", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "kPa." - }, - { - "name": "wind_gusts", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "m/s." - }, - { - "name": "precipitation", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "mm." - }, - { - "name": "wind_direction", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "degrees" - }, - { - "name": "satellites", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "hdop", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_bearing_to_kampala_center", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_distance_to_kampala_center", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_landform_90", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_landform_270", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_aspect", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_distance_to_nearest_tertiary_road", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_distance_to_nearest_primary_road", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_distance_to_nearest_road", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_distance_to_nearest_residential_road", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_distance_to_nearest_secondary_road", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_distance_to_nearest_unclassified_road", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "country", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "region", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "district", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "city", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "county", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "sub_county", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "parish", - "type": "STRING", - "mode": "NULLABLE" - } -] \ No newline at end of file + { + "name": "tenant", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "network", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "timestamp", + "type": "TIMESTAMP", + "mode": "NULLABLE" + }, + { + "name": "site_id", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_name", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_description", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_latitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_longitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_altitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_number", + "type": "INTEGER", + "mode": "NULLABLE" + }, + { + "name": "device_id", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "device_category", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "device_latitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_longitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_battery", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_altitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_temperature", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_humidity", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "pm2_5", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "s1_pm2_5", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 2.5 of sensor one" + }, + { + "name": "s2_pm2_5", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 2.5 of sensor two" + }, + { + "name": "pm2_5_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the pm 2.5 average value of sensor one and sensor two" + }, + { + "name": "pm2_5_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the calibrated pm 2.5 value." + }, + { + "name": "pm10", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "s1_pm10", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 10 of sensor one" + }, + { + "name": "s2_pm10", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 10 of sensor two" + }, + { + "name": "pm10_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the pm 10 average value of sensor one and sensor two." + }, + { + "name": "pm10_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the calibrated pm 10 value. " + }, + { + "name": "no2", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "no2_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "ppb.\nNO2 Concentration raw value" + }, + { + "name": "no2_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "ppb.\nEquals the NO2 Concentration value if available else equals the raw value." + }, + { + "name": "pm1", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "pm1_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 1 raw value. " + }, + { + "name": "pm1_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 1 calibrated value. " + }, + { + "name": "temperature", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "°C." + }, + { + "name": "humidity", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "%." + }, + { + "name": "wind_speed", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "m/s." + }, + { + "name": "atmospheric_pressure", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "kPa." + }, + { + "name": "radiation", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "W/m2." + }, + { + "name": "vapor_pressure", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "kPa." + }, + { + "name": "wind_gusts", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "m/s." + }, + { + "name": "precipitation", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "mm." + }, + { + "name": "wind_direction", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "degrees" + }, + { + "name": "satellites", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "hdop", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_bearing_to_kampala_center", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_distance_to_kampala_center", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_landform_90", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_landform_270", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_aspect", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_distance_to_nearest_tertiary_road", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_distance_to_nearest_primary_road", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_distance_to_nearest_road", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_distance_to_nearest_residential_road", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_distance_to_nearest_secondary_road", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_distance_to_nearest_unclassified_road", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "country", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "region", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "district", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "city", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "county", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "sub_county", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "parish", + "type": "STRING", + "mode": "NULLABLE" + } +] diff --git a/src/workflows/airqo_etl_utils/schema/devices.json b/src/workflows/airqo_etl_utils/schema/devices.json index d27384cb32..966b69122e 100644 --- a/src/workflows/airqo_etl_utils/schema/devices.json +++ b/src/workflows/airqo_etl_utils/schema/devices.json @@ -4,6 +4,11 @@ "type": "STRING", "mode": "REQUIRED" }, + { + "name": "network", + "type": "STRING", + "mode": "REQUIRED" + }, { "name": "latitude", "type": "FLOAT", @@ -59,4 +64,4 @@ "type": "STRING", "mode": "NULLABLE" } -] \ No newline at end of file +] diff --git a/src/workflows/airqo_etl_utils/schema/latest_measurements.json b/src/workflows/airqo_etl_utils/schema/latest_measurements.json index 72f4cffc4f..34c50539e0 100644 --- a/src/workflows/airqo_etl_utils/schema/latest_measurements.json +++ b/src/workflows/airqo_etl_utils/schema/latest_measurements.json @@ -1,238 +1,243 @@ [ - { - "name": "tenant", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "timestamp", - "type": "TIMESTAMP", - "mode": "NULLABLE" - }, - { - "name": "site_id", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_name", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_location", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_display_name", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_display_location", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "site_latitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_longitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_approximate_latitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "site_approximate_longitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_number", - "type": "INTEGER", - "mode": "NULLABLE" - }, - { - "name": "device_id", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "device_category", - "type": "STRING", - "mode": "NULLABLE" - }, - { - "name": "device_latitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "device_longitude", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "pm2_5", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "s1_pm2_5", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 2.5 of sensor one" - }, - { - "name": "s2_pm2_5", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 2.5 of sensor two" - }, - { - "name": "pm2_5_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the pm 2.5 average value of sensor one and sensor two" - }, - { - "name": "pm2_5_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the calibrated pm 2.5 value." - }, - { - "name": "pm10", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "s1_pm10", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 10 of sensor one" - }, - { - "name": "s2_pm10", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 10 of sensor two" - }, - { - "name": "pm10_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the pm 10 average value of sensor one and sensor two." - }, - { - "name": "pm10_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\nEquals the calibrated pm 10 value." - }, - { - "name": "no2", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "no2_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "ppb.\nNO2 Concentration raw value" - }, - { - "name": "no2_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "ppb.\nEquals the NO2 Concentration value if available else equals the raw value." - }, - { - "name": "pm1", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "pm1_raw_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 1 raw value." - }, - { - "name": "pm1_calibrated_value", - "type": "FLOAT", - "mode": "NULLABLE", - "description": " μg/m3.\npm 1 calibrated value." - }, - { - "name": "temperature", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "°C." - }, - { - "name": "humidity", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "%." - }, - { - "name": "wind_speed", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "m/s." - }, - { - "name": "atmospheric_pressure", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "kPa." - }, - { - "name": "radiation", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "W/m2." - }, - { - "name": "vapor_pressure", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "kPa." - }, - { - "name": "wind_gusts", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "m/s." - }, - { - "name": "precipitation", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "mm." - }, - { - "name": "wind_direction", - "type": "FLOAT", - "mode": "NULLABLE", - "description": "degrees" - }, - { - "name": "satellites", - "type": "FLOAT", - "mode": "NULLABLE" - }, - { - "name": "hdop", - "type": "FLOAT", - "mode": "NULLABLE" - } -] \ No newline at end of file + { + "name": "tenant", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "network", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "timestamp", + "type": "TIMESTAMP", + "mode": "NULLABLE" + }, + { + "name": "site_id", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_name", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_location", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_display_name", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_display_location", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "site_latitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_longitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_approximate_latitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "site_approximate_longitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_number", + "type": "INTEGER", + "mode": "NULLABLE" + }, + { + "name": "device_id", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "device_category", + "type": "STRING", + "mode": "NULLABLE" + }, + { + "name": "device_latitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "device_longitude", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "pm2_5", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "s1_pm2_5", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 2.5 of sensor one" + }, + { + "name": "s2_pm2_5", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 2.5 of sensor two" + }, + { + "name": "pm2_5_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the pm 2.5 average value of sensor one and sensor two" + }, + { + "name": "pm2_5_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the calibrated pm 2.5 value." + }, + { + "name": "pm10", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "s1_pm10", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 10 of sensor one" + }, + { + "name": "s2_pm10", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 10 of sensor two" + }, + { + "name": "pm10_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the pm 10 average value of sensor one and sensor two." + }, + { + "name": "pm10_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\nEquals the calibrated pm 10 value." + }, + { + "name": "no2", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "no2_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "ppb.\nNO2 Concentration raw value" + }, + { + "name": "no2_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "ppb.\nEquals the NO2 Concentration value if available else equals the raw value." + }, + { + "name": "pm1", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "pm1_raw_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 1 raw value." + }, + { + "name": "pm1_calibrated_value", + "type": "FLOAT", + "mode": "NULLABLE", + "description": " μg/m3.\npm 1 calibrated value." + }, + { + "name": "temperature", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "°C." + }, + { + "name": "humidity", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "%." + }, + { + "name": "wind_speed", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "m/s." + }, + { + "name": "atmospheric_pressure", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "kPa." + }, + { + "name": "radiation", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "W/m2." + }, + { + "name": "vapor_pressure", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "kPa." + }, + { + "name": "wind_gusts", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "m/s." + }, + { + "name": "precipitation", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "mm." + }, + { + "name": "wind_direction", + "type": "FLOAT", + "mode": "NULLABLE", + "description": "degrees" + }, + { + "name": "satellites", + "type": "FLOAT", + "mode": "NULLABLE" + }, + { + "name": "hdop", + "type": "FLOAT", + "mode": "NULLABLE" + } +] diff --git a/src/workflows/airqo_etl_utils/schema/mobile_measurements.json b/src/workflows/airqo_etl_utils/schema/mobile_measurements.json index 3934262811..1f7b1b5a11 100644 --- a/src/workflows/airqo_etl_utils/schema/mobile_measurements.json +++ b/src/workflows/airqo_etl_utils/schema/mobile_measurements.json @@ -4,6 +4,11 @@ "type": "STRING", "mode": "NULLABLE" }, + { + "name": "network", + "type": "STRING", + "mode": "NULLABLE" + }, { "name": "timestamp", "type": "TIMESTAMP", @@ -129,4 +134,4 @@ "type": "FLOAT", "mode": "NULLABLE" } -] \ No newline at end of file +] From 647c05a94167276a2f215bc6e35bc44ea59b72fc Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Wed, 4 Dec 2024 14:19:51 +0300 Subject: [PATCH 2/3] Clean up to remove repeated operations --- src/workflows/airqo_etl_utils/airqo_utils.py | 21 +++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/workflows/airqo_etl_utils/airqo_utils.py b/src/workflows/airqo_etl_utils/airqo_utils.py index 71e519ec80..51d14f2721 100644 --- a/src/workflows/airqo_etl_utils/airqo_utils.py +++ b/src/workflows/airqo_etl_utils/airqo_utils.py @@ -758,19 +758,26 @@ def clean_low_cost_sensor_data( AirQoGxExpectations.from_pandas().pm2_5_low_cost_sensor_raw_data( data ) - + else: + data["timestamp"] = pd.to_datetime(data["timestamp"]) data.dropna(subset=["timestamp"], inplace=True) - data["timestamp"] = pd.to_datetime(data["timestamp"]) + data.drop_duplicates( subset=["timestamp", "device_id"], keep="first", inplace=True ) # TODO Find an appropriate place to put this if device_category == DeviceCategory.LOW_COST: - data["pm2_5_raw_value"] = data[["s1_pm2_5", "s2_pm2_5"]].mean(axis=1) - data["pm2_5"] = data[["s1_pm2_5", "s2_pm2_5"]].mean(axis=1) - data["pm10_raw_value"] = data[["s1_pm10", "s2_pm10"]].mean(axis=1) - data["pm10"] = data[["s1_pm10", "s2_pm10"]].mean(axis=1) + is_airqo_network = data["network"] == "airqo" + + pm2_5_mean = data.loc[is_airqo_network, ["s1_pm2_5", "s2_pm2_5"]].mean( + axis=1 + ) + pm10_mean = data.loc[is_airqo_network, ["s1_pm10", "s2_pm10"]].mean(axis=1) + data.loc[is_airqo_network, "pm2_5_raw_value"] = pm2_5_mean + data.loc[is_airqo_network, "pm2_5"] = pm2_5_mean + data.loc[is_airqo_network, "pm10_raw_value"] = pm10_mean + data.loc[is_airqo_network, "pm10"] = pm10_mean return data @staticmethod @@ -1033,7 +1040,7 @@ def merge_aggregated_weather_data( @staticmethod def extract_devices_deployment_logs() -> pd.DataFrame: airqo_api = AirQoApi() - devices = airqo_api.get_devices(tenant=Tenant.AIRQO) + devices = airqo_api.get_devices(network=str(Tenant.AIRQO)) devices_history = pd.DataFrame() for device in devices: try: From ba74197e89e521c39b92bc88919212062336eab7 Mon Sep 17 00:00:00 2001 From: NicholasTurner23 Date: Wed, 4 Dec 2024 14:20:46 +0300 Subject: [PATCH 3/3] Cleanup datetime conversion --- src/workflows/airqo_etl_utils/data_validator.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/workflows/airqo_etl_utils/data_validator.py b/src/workflows/airqo_etl_utils/data_validator.py index 34a25ba59f..13527673be 100644 --- a/src/workflows/airqo_etl_utils/data_validator.py +++ b/src/workflows/airqo_etl_utils/data_validator.py @@ -72,10 +72,10 @@ def format_data_types( data[col] = ( data[col] .astype(str) - .str.replace(r"[^\w\s\.\-:]", "", regex=True) + .str.replace(r"[^\w\s\.\-+:]", "", regex=True) .str.replace(r"(? pd.DataFrame: dtype: list(set(columns) & set(data.columns)) for dtype, columns in column_types.items() } - data = DataValidationUtils.format_data_types( data=data, floats=filtered_columns[ColumnDataType.FLOAT], @@ -151,7 +150,6 @@ def remove_outliers(data: pd.DataFrame) -> pd.DataFrame: ) validated_columns = list(chain.from_iterable(filtered_columns.values())) - for col in validated_columns: is_airqo_network = data["network"] == "airqo" mapped_name = configuration.AIRQO_DATA_COLUMN_NAME_MAPPING.get(col, None)