airqo-platform · Baalmart · Nov 27, 2024 · Nov 14, 2024 · Nov 14, 2024 · Nov 14, 2024
diff --git a/src/spatial/configure.py b/src/spatial/configure.py
@@ -25,7 +25,9 @@ class Config:
         "BIGQUERY_SATELLITE_MODEL_PREDICTIONS"
     )
 
-
+    HUGGING_FACE_TOKEN = os.getenv("HUGGING_FACE_TOKEN")
+    GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 class ProductionConfig(Config):
     DEBUG = False
     TESTING = False

diff --git a/src/spatial/controllers/controllers.py b/src/spatial/controllers/controllers.py
@@ -12,6 +12,7 @@
 from views.satellite_predictions import SatellitePredictionView
 from views.site_category_view import SiteCategorizationView
 from views.site_selection_views import SiteSelectionView
+from views.report_view import ReportView
 
 
 controller_bp = Blueprint("controller", __name__)
@@ -66,3 +67,7 @@ def site_selection():
 @controller_bp.route("/satellite_prediction", methods=["POST"])
 def get_satellite_prediction():
     return SatellitePredictionView.make_predictions()
+
+@controller_bp.route("/air_quality_report", methods=["POST"])
+def fetch_air_quality():
+    return ReportView.generate_air_quality_report()
diff --git a/src/spatial/models/report_datafetcher.py b/src/spatial/models/report_datafetcher.py
@@ -0,0 +1,214 @@
+import requests
+import openai
+from transformers import AutoModelForCausalLM, AutoTokenizer
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from transformers import AutoModelForCausalLM, AutoTokenizer
+from huggingface_hub import login
+from configure import Config
+import google.generativeai as genai 
+
+
+# Configure API keys
+GOOGLE_API_KEY = Config.GOOGLE_API_KEY
+genai.configure(api_key=GOOGLE_API_KEY)
+hf_token = Config.HUGGING_FACE_TOKEN
+
+if hf_token:
+    login(hf_token)
+else:
+    print("Hugging Face token is missing. Set the 'HUGGING_FACE_TOKEN' environment variable.")
+
+class DataFetcher:
+    @staticmethod
+    def fetch_air_quality_data_a(grid_id, start_time, end_time):
+        token = Config.AIRQO_API_TOKEN  
+        if token is None:
+            print("Error: AIRQO_API_TOKEN environment variable is not set.")
+            return None
+
+        url = f"https://platform.airqo.net/api/v2/analytics/grid/report?token={token}"
+        payload = {"grid_id": grid_id, "start_time": start_time, "end_time": end_time}
+
+        try:
+            response = requests.post(url, json=payload)
+            response.raise_for_status()
+            return response.json()
+        except requests.exceptions.HTTPError as http_err:
+            print(f"HTTP error occurred: {http_err}")
+        except requests.exceptions.RequestException as req_err:
+            print(f"Request error occurred: {req_err}")
+        except ValueError as json_err:
+            print(f"JSON decoding error: {json_err}")
+
+        return None
+
+class AirQualityReport:  
+    def __init__(self, data): 
+        self.data = data
+        self.grid_name = data.get('airquality', {}).get('sites', {}).get('grid name', [None])
+        self.annual_data = data.get('airquality', {}).get('annual_pm', [None])[0]
+        self.daily_mean_data = data.get('airquality', {}).get('daily_mean_pm', [])
+        self.diurnal = data.get('airquality', {}).get('diurnal', [])
+        self.monthly_data = data.get('airquality', {}).get('site_monthly_mean_pm', [])
+        self.monthly_name_data = data.get('airquality', {}).get('pm_by_month_name', [])
+        main_site_info = self.monthly_data[0] if self.monthly_data else {}
+        self.main_site = main_site_info.get('site_name')
+        self.site_names = [item.get('site_name', None) for item in self.data.get('airquality', {}).get('site_annual_mean_pm', [])]
+        self.site_latitude = main_site_info.get('site_latitude')
+        self.site_longitude = main_site_info.get('site_longitude')
+        self.num_sites = data.get('airquality', {}).get('sites', {}).get('number_of_sites') 
+        self.starttime = data.get('airquality', {}).get('period', {}).get('startTime', '')[:10]
+        self.endtime = data.get('airquality', {}).get('period', {}).get('endTime', '')[:10] 
+
+        self.annual_pm2_5_calibrated_value = self.annual_data.get("pm2_5_calibrated_value")
+        self.annual_pm10_calibrated_value = self.annual_data.get("pm10_calibrated_value")
+
+        # Finding the minimum and maximum values
+        self.daily_min_pm2_5 = min(self.daily_mean_data, key=lambda x: x['pm2_5_calibrated_value'])
+        self.daily_max_pm2_5 = max(self.daily_mean_data, key=lambda x: x['pm2_5_calibrated_value'])
+
+
+
+        # Initialize models once in the constructor
+        self.gemini_model = genai.GenerativeModel('gemini-pro')
+        openai.api_key = Config.OPENAI_API_KEY
-        # Initialize models once in the constructor
-        self.gemini_model = genai.GenerativeModel('gemini-pro')
-        openai.api_key = Config.OPENAI_API_KEY
+        # Initialize models once in the constructor
+        try:
+            self.gemini_model = genai.GenerativeModel('gemini-pro')
+            openai.api_key = Config.OPENAI_API_KEY
+        except Exception as e:
+            logging.error(f"Failed to initialize AI models: {e}")
+            raise
-        # Initialize models once in the constructor
-        self.gemini_model = genai.GenerativeModel('gemini-pro')
-        openai.api_key = Config.OPENAI_API_KEY
+        # Initialize models once in the constructor
+        try:
+            self.gemini_model = genai.GenerativeModel('gemini-pro')
+            openai.api_key = Config.OPENAI_API_KEY
+        except Exception as e:
+            logging.error(f"Failed to initialize AI models: {e}")
+            raise
+
+    def _prepare_base_info(self):
+        return (
+            f"The air quality report is for {self.grid_name} for the period of {self.starttime} to {self.endtime}. "
+            f"These air quality monitoring sites are {self.site_names} and measure PM2.5 and PM10, "
+            f"at coordinates {self.site_latitude}°N, {self.site_longitude}°E. "
+            f"The annual PM2.5 concentration averages {self.annual_data} µg/m³."
+        )
+
+    def _generate_prompt(self, audience):
+        base_info = self._prepare_base_info()
+        if audience == "researcher":
+            return (
+                f"{audience}"
+
+                f"Generate a comprehensive air quality assessment report for {self.grid_name} for the period of {self.starttime} to {self.endtime}. Begin with a detailed introduction (100-130 words) covering the city's geographical location, climate characteristics, population density, and major pollution sources. "
+                f"{base_info} include the period under review."
+                f"Daily mean measurements show: {self.daily_mean_data}. "
+                f"Diurnal patterns indicate: {self.diurnal}. Monthly trends reveal: {self.monthly_data}. "
+                f"Provide a thorough analysis of spatial and temporal air quality variations, identify pollution hotspots and clean zones, examine seasonal patterns, and assess compliance with WHO guidelines. "
+                f"Conclude with actionable recommendations for air quality improvement and public health protection. Data source: AirQo monitoring network."
+            )
+        elif audience == "policymaker":
+            return (
+                f"{audience}"
+                f"Create an executive summary of air quality conditions in {self.grid_name} for the period of {self.starttime} to {self.endtime}. for policy decision-making. Begin with key findings and their policy implications (50-75 words). "
+                f"{base_info} include the period under review."
+                f"Highlight critical trends: {self.monthly_data}. Diurnal patterns indicate: {self.diurnal}. "
+                f"Focus on: 1) Areas exceeding air quality standards, 2) Population exposure risk assessment, "
+                f"3) Economic implications of poor air quality. Present clear, actionable policy recommendations with expected outcomes and implementation timeframes. "
+                f"Include cost-benefit considerations and potential regulatory measures. Data source: AirQo monitoring network."
+            )
+        elif audience == "general public":
+            return (
+                f"{audience}"
+                f"{base_info} include the period under review."
+                f"Create a clear, easy-to-understand report about air quality in {self.grid_name} for the period of {self.starttime} to {self.endtime}. Start with a simple explanation of why air quality matters for public health. "
+                f"We have {self.num_sites} air quality monitors in your area. The average PM2.5 level this year is {self.annual_data} µg/m³. "
+                f"Diurnal patterns indicate: {self.diurnal}. Monthly trends reveal: {self.monthly_data}. "
+                f"Explain what these numbers mean for daily activities. Include: 1) When air quality is best and worst during the day, "
+                f"2) Which areas have better or worse air quality, 3) Simple steps people can take to protect their health, "
+                f"4) How to access daily air quality updates. Use plain language and avoid technical terms. "
+                f"Add practical tips for reducing exposure to air pollution. Data source: AirQo monitoring network."
+            )
+        else:
+            raise ValueError("Invalid audience type. Please specify 'researcher', 'policymaker', or 'general public'.")
+
+    def generate_report_with_gemini(self, audience):
+        prompt = self._generate_prompt(audience)
+        response = self.gemini_model.generate_content(prompt)
+        gemini_output = response.text
+        return self._prepare_report_json(gemini_output)
+
+    def generate_report_with_openai(self, audience):
+        prompt = self._generate_prompt(audience)
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": prompt}]
+        )
+        openai_output = response.choices[0].message['content']
+        return self._prepare_report_json(openai_output)
+
+    # Use non-LLM template text as report content
+    def generate_report_template_witout_LLM(self, audience):
-    def generate_report_template_witout_LLM(self, audience):
+    def generate_report_template_without_LLM(self, audience):
-    def generate_report_template_witout_LLM(self, audience):
+    def generate_report_template_without_LLM(self, audience):
+        prompt = self._generate_prompt(audience)
+        report_content = prompt
+        return self._prepare_report_json(report_content)
+
+    def generate_report_without_llm(self):
+    # Determine peak time and least PM2.5 values
+        peak_data = max(self.diurnal, key=lambda x: x['pm2_5_calibrated_value'])
+        peak_time = peak_data['hour']
+        peak_pm2_5 = peak_data['pm2_5_calibrated_value']
+
+        least_data = min(self.diurnal, key=lambda x: x['pm2_5_calibrated_value'])
+        least_pm2_5 = least_data['pm2_5_calibrated_value']
+        least_pm2_5_time = least_data['hour']
+
+
+
+        introduction = (
+            f"The air quality report for {self.grid_name} covers the period from {self.starttime} to {self.endtime}. "
+            f"The {self.num_sites} monitored sites include: {', '.join(self.site_names)}. "
+            f"Measurements are taken for PM2.5 and PM10 concentrations. " 
+            f"The annual average PM2.5 concentration is {self.annual_pm2_5_calibrated_value} µg/m³."
+        )
+
+        diurnal_description = (
+            f"Diurnal patterns observed include the following: {self.diurnal}. "
+            f"These patterns provide insight into air quality fluctuations throughout the day. "
+            f"The peak in PM2.5 {peak_pm2_5} levels occurs around {peak_time}:00 hr, indicating a period of higher pollution, often associated with increased activity or traffic. "
+            f"Conversely, the period with the least PM2.5 {least_pm2_5}  µg/m³ levels is around {least_pm2_5_time} :00 hr , "
+            f"which usually represents a period of lower activity or better atmospheric dispersion."
+            f"Understanding the patterns of pollution and their impacts on public health is crucial for effective environmental management and policy-making. "
+            f"Throughout this report, we will explore key trends in PM2.5 and PM10 concentrations, the diurnal variations, and the impact of these levels on air quality across the region."
+
+        )
+
+        daily_mean_description = (
+            f"Daily mean PM2.5 measurements during the period were recorded as follows: {self.daily_mean_data}. "
+            f"This data reveals variations in air quality on a day-to-day basis."
+        )
+
+        site_pm25_description = (
+            f"The concentration of PM2.5 across different sites shows variability: "
+            f"{', '.join([f'{site} with PM2.5 levels' for site in self.site_names])}. "
+            f"These variations indicate site-specific air quality differences for the known grids."
+        )
+        conclusion = (
+            f"Overall, the air quality report highlights the importance of monitoring and understanding the patterns of PM2.5 and PM10 concentrations in the {self.grid_name} "
+            f"The analysis of the data reveals that air quality varies significantly over time, with periods of both moderate and unhealthy conditions. "
+            f"It’s observed that these fluctuations may be influenced by various factors, including seasonal changes. For instance, the washout effect during the rainy"
+            f" season could potentially contribute to these variations. Specifically, for the period from   {self.starttime} to {self.endtime},"
+            f" the PM2.5 raw values ranged from {self.daily_min_pm2_5['pm2_5_raw_value']} µg/m³ on {self.daily_min_pm2_5['date']} to {self.daily_max_pm2_5['pm2_5_raw_value']} µg/m³ on {self.daily_max_pm2_5['date']}. respectively."
+            f"This pattern underscores the importance of continuous monitoring and the implementation of"
+            f"effective interventions to maintain air quality within safe limits. Ensuring good air quality is crucial for "
+            f"the well-being of both residents and visitors. Therefore, it’s imperative to adopt long-term"
+            f"strategies and measures that can effectively mitigate the impact of factors leading to poor airquality."
+            f"In conclusion, continuous monitoring, timely intervention, and effective policies are key to maintaining good air quality and safeguarding public health. "
+        )
+
+        report_content = (
+            f"{introduction}\n\n"
+            f"{diurnal_description}\n\n"
+            f"{daily_mean_description}\n\n"
+            f"{site_pm25_description}\n\n"
+            f"{conclusion}"
+        )
+
+
+        return self._prepare_report_json(report_content)
+
+    def _prepare_report_json(self, report_content):
+        return {
+            "grid_name": self.grid_name,
+            "main_site": self.main_site, 
+            "annual_data": self.annual_data,
+            "daily_mean_data": self.daily_mean_data,
+            "diurnal": self.diurnal,
+            "monthly_data": self.monthly_data,
+            "report": report_content
+        }
diff --git a/src/spatial/requirements.txt b/src/spatial/requirements.txt
@@ -22,4 +22,10 @@ scikit-learn~=1.5.2
 gcsfs~=2024.9.0.post1
 joblib~=1.4.2
 lightgbm~=4.1.0
-numpy~=1.25.2
+numpy~=1.25.2
+torch
+transformers
+datasets
+sentencepiece
+huggingface_hub
-torch
-transformers
-datasets
-sentencepiece
-huggingface_hub
-google-generativeai
+torch~=2.2.1
+transformers~=4.38.2
+datasets~=2.18.0
+sentencepiece~=0.1.99
+huggingface_hub~=0.21.4
+google-generativeai~=0.3.2
-torch
-transformers
-datasets
-sentencepiece
-huggingface_hub
-google-generativeai
+torch~=2.2.1
+transformers~=4.38.2
+datasets~=2.18.0
+sentencepiece~=0.1.99
+huggingface_hub~=0.21.4
+google-generativeai~=0.3.2
+google-generativeai
diff --git a/src/spatial/views/report_view.py b/src/spatial/views/report_view.py
@@ -0,0 +1,42 @@
+from flask import request, jsonify
+from models.report_datafetcher import DataFetcher, AirQualityReport
+
+class ReportView:
+    @staticmethod
+    def generate_air_quality_report():
+        """Fetch air quality data and generate a report based on grid_id, start_time, end_time, and audience."""
+        print('Processing request to generate air quality report...')
+
+        # Extract data from the request
+        data = request.json
+        grid_id = data.get("grid_id")
+        start_time = data.get("start_time")
+        end_time = data.get("end_time")
+        audience = data.get("audience", "general public")  # Default to "general public" if audience is not provided
+
+        # Validate input parameters
+        if not all([grid_id, start_time, end_time, audience]):
+            return jsonify({"error": "Missing required parameters: grid_id, start_time, end_time, audience"}), 400
-        # Validate input parameters
-        if not all([grid_id, start_time, end_time, audience]):
-            return jsonify({"error": "Missing required parameters: grid_id, start_time, end_time, audience"}), 400
+        # Validate input parameters
+        missing_params = []
+        if not grid_id: missing_params.append("grid_id")
+        if not start_time: missing_params.append("start_time")
+        if not end_time: missing_params.append("end_time")
+        if not audience: missing_params.append("audience")
+        
+        if missing_params:
+            return jsonify({
+                "error": "Missing required parameters",
+                "missing_parameters": missing_params
+            }), 400
-        # Validate input parameters
-        if not all([grid_id, start_time, end_time, audience]):
-            return jsonify({"error": "Missing required parameters: grid_id, start_time, end_time, audience"}), 400
+        # Validate input parameters
+        missing_params = []
+        if not grid_id: missing_params.append("grid_id")
+        if not start_time: missing_params.append("start_time")
+        if not end_time: missing_params.append("end_time")
+        if not audience: missing_params.append("audience")
+        
+        if missing_params:
+            return jsonify({
+                "error": "Missing required parameters",
+                "missing_parameters": missing_params
+            }), 400
+
+        # Fetch air quality data
+        air_quality_data = DataFetcher.fetch_air_quality_data_a(grid_id, start_time, end_time)
+
+        if air_quality_data is None:
+            return jsonify({"error": "No data found for the given parameters"}), 404
+
+        # Create an air quality report
+        report = AirQualityReport(air_quality_data)
+
+        # Generate the report with the specified audience
+    #    json_report = report.generate_report_with_gemini(audience)          # using google gemini
+        json_report = report.generate_report_without_llm()
+    #    json_report = report.generate_report_template_witout_LLM(audience) # without LLM
+    #    json_report = report.generate_report_with_openai(audience)         # Using openai api 
+
+
+        if json_report is None:
+            return jsonify({"error": "Failed to generate report"}), 500
+
+        return jsonify({"report": json_report}), 200
+
+