From ae736779a63d43d62ead77d3ddafab3306b2acb4 Mon Sep 17 00:00:00 2001 From: wabinyai Date: Thu, 14 Nov 2024 11:23:39 +0300 Subject: [PATCH 01/17] auto reporting **_WHAT DOES THIS PR DO?_** This PR implements a new API endpoint for generating comprehensive air quality reports with spatial analysis. - [ ] Item 1 - [ ] Item 2 **_WHAT ISSUES ARE RELATED TO THIS PR?_** - Jira cards - [] - GitHub issues - Closes # **_HOW DO I TEST OUT THIS PR?_** - [ ] Analytics: [Link to Analytics README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/analytics#readme) - [ ] Auth Service: [Link to Auth Service README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/auth-service#readme) - [ ] Calibrate: [Link to Calibrate README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/calibrate#readme) - [ ] Data Proxy: [Link to Data Proxy README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/data-proxy#readme) - [ ] Device Monitoring: [Link to Device Monitoring README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/device-monitoring#readme) - [ ] Device Registry: [Link to Device Registry README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/device-registry#readme) - [ ] Device Status: [Link to Device Status README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/device-status#readme) - [ ] Device Uptime: [Link to Device Uptime README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/device-uptime#readme) - [ ] Exceedances: [Link to Exceedances README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/exceedances#readme) - [ ] Firebase: [Link to Firebase README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/firebase) - [ ] GP Model: [Link to GP Model README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/gp-model#readme) - [ ] Incentives: [Link to Incentives README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/incentives#readme) - [ ] Kafka Connectors: [Link to Kafka Connectors](https://github.com/airqo-platform/AirQo-api/tree/staging/src/kafka-connectors) - [ ] Locate: [Link to Locate README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/locate#readme) - [ ] Metadata: [Link to Metadata README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/meta-data#readme) - [ ] Predict: [Link to Predict README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/predict#readme) - [ ] Spatial: [Link to Spatial](https://github.com/airqo-platform/AirQo-api/tree/staging/src/spatial) - [ ] View: [Link to View README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/view#readme) - [ ] Workflows: [Link to Workflows README](https://github.com/airqo-platform/AirQo-api/tree/staging/src/workflows#readme) **_WHICH ENDPOINTS SHOULD BE READY FOR TESTING?:_** - [ ] Endpoint 1 - [ ] Endpoint 2 **_ARE THERE ANY RELATED PRs?_** - [ ] Related PR 1 - [ ] Related PR 2 --- src/spatial/configure.py | 4 +- src/spatial/controllers/controllers.py | 5 + src/spatial/models/report_datafetcher.py | 214 +++++++++++++++++++++++ src/spatial/requirements.txt | 8 +- src/spatial/views/report_view.py | 42 +++++ 5 files changed, 271 insertions(+), 2 deletions(-) create mode 100644 src/spatial/models/report_datafetcher.py create mode 100644 src/spatial/views/report_view.py diff --git a/src/spatial/configure.py b/src/spatial/configure.py index 561196e70b..b09ccc2033 100644 --- a/src/spatial/configure.py +++ b/src/spatial/configure.py @@ -25,7 +25,9 @@ class Config: "BIGQUERY_SATELLITE_MODEL_PREDICTIONS" ) - + HUGGING_FACE_TOKEN = os.getenv("HUGGING_FACE_TOKEN") + GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") + OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") class ProductionConfig(Config): DEBUG = False TESTING = False diff --git a/src/spatial/controllers/controllers.py b/src/spatial/controllers/controllers.py index df1570f519..26ebc4b86d 100644 --- a/src/spatial/controllers/controllers.py +++ b/src/spatial/controllers/controllers.py @@ -12,6 +12,7 @@ from views.satellite_predictions import SatellitePredictionView from views.site_category_view import SiteCategorizationView from views.site_selection_views import SiteSelectionView +from views.report_view import ReportView controller_bp = Blueprint("controller", __name__) @@ -66,3 +67,7 @@ def site_selection(): @controller_bp.route("/satellite_prediction", methods=["POST"]) def get_satellite_prediction(): return SatellitePredictionView.make_predictions() + +@controller_bp.route("/air_quality_report", methods=["POST"]) +def fetch_air_quality(): + return ReportView.generate_air_quality_report() \ No newline at end of file diff --git a/src/spatial/models/report_datafetcher.py b/src/spatial/models/report_datafetcher.py new file mode 100644 index 0000000000..52317650ae --- /dev/null +++ b/src/spatial/models/report_datafetcher.py @@ -0,0 +1,214 @@ +import requests +import openai +from transformers import AutoModelForCausalLM, AutoTokenizer +from huggingface_hub import login +from configure import Config +import google.generativeai as genai + + +# Configure API keys +GOOGLE_API_KEY = Config.GOOGLE_API_KEY +genai.configure(api_key=GOOGLE_API_KEY) +hf_token = Config.HUGGING_FACE_TOKEN + +if hf_token: + login(hf_token) +else: + print("Hugging Face token is missing. Set the 'HUGGING_FACE_TOKEN' environment variable.") + +class DataFetcher: + @staticmethod + def fetch_air_quality_data_a(grid_id, start_time, end_time): + token = Config.AIRQO_API_TOKEN + if token is None: + print("Error: AIRQO_API_TOKEN environment variable is not set.") + return None + + url = f"https://platform.airqo.net/api/v2/analytics/grid/report?token={token}" + payload = {"grid_id": grid_id, "start_time": start_time, "end_time": end_time} + + try: + response = requests.post(url, json=payload) + response.raise_for_status() + return response.json() + except requests.exceptions.HTTPError as http_err: + print(f"HTTP error occurred: {http_err}") + except requests.exceptions.RequestException as req_err: + print(f"Request error occurred: {req_err}") + except ValueError as json_err: + print(f"JSON decoding error: {json_err}") + + return None + +class AirQualityReport: + def __init__(self, data): + self.data = data + self.grid_name = data.get('airquality', {}).get('sites', {}).get('grid name', [None]) + self.annual_data = data.get('airquality', {}).get('annual_pm', [None])[0] + self.daily_mean_data = data.get('airquality', {}).get('daily_mean_pm', []) + self.diurnal = data.get('airquality', {}).get('diurnal', []) + self.monthly_data = data.get('airquality', {}).get('site_monthly_mean_pm', []) + self.monthly_name_data = data.get('airquality', {}).get('pm_by_month_name', []) + main_site_info = self.monthly_data[0] if self.monthly_data else {} + self.main_site = main_site_info.get('site_name') + self.site_names = [item.get('site_name', None) for item in self.data.get('airquality', {}).get('site_annual_mean_pm', [])] + self.site_latitude = main_site_info.get('site_latitude') + self.site_longitude = main_site_info.get('site_longitude') + self.num_sites = data.get('airquality', {}).get('sites', {}).get('number_of_sites') + self.starttime = data.get('airquality', {}).get('period', {}).get('startTime', '')[:10] + self.endtime = data.get('airquality', {}).get('period', {}).get('endTime', '')[:10] + + self.annual_pm2_5_calibrated_value = self.annual_data.get("pm2_5_calibrated_value") + self.annual_pm10_calibrated_value = self.annual_data.get("pm10_calibrated_value") + + # Finding the minimum and maximum values + self.daily_min_pm2_5 = min(self.daily_mean_data, key=lambda x: x['pm2_5_calibrated_value']) + self.daily_max_pm2_5 = max(self.daily_mean_data, key=lambda x: x['pm2_5_calibrated_value']) + + + + # Initialize models once in the constructor + self.gemini_model = genai.GenerativeModel('gemini-pro') + openai.api_key = Config.OPENAI_API_KEY + + def _prepare_base_info(self): + return ( + f"The air quality report is for {self.grid_name} for the period of {self.starttime} to {self.endtime}. " + f"These air quality monitoring sites are {self.site_names} and measure PM2.5 and PM10, " + f"at coordinates {self.site_latitude}°N, {self.site_longitude}°E. " + f"The annual PM2.5 concentration averages {self.annual_data} µg/m³." + ) + + def _generate_prompt(self, audience): + base_info = self._prepare_base_info() + if audience == "researcher": + return ( + f"{audience}" + + f"Generate a comprehensive air quality assessment report for {self.grid_name} for the period of {self.starttime} to {self.endtime}. Begin with a detailed introduction (100-130 words) covering the city's geographical location, climate characteristics, population density, and major pollution sources. " + f"{base_info} include the period under review." + f"Daily mean measurements show: {self.daily_mean_data}. " + f"Diurnal patterns indicate: {self.diurnal}. Monthly trends reveal: {self.monthly_data}. " + f"Provide a thorough analysis of spatial and temporal air quality variations, identify pollution hotspots and clean zones, examine seasonal patterns, and assess compliance with WHO guidelines. " + f"Conclude with actionable recommendations for air quality improvement and public health protection. Data source: AirQo monitoring network." + ) + elif audience == "policymaker": + return ( + f"{audience}" + f"Create an executive summary of air quality conditions in {self.grid_name} for the period of {self.starttime} to {self.endtime}. for policy decision-making. Begin with key findings and their policy implications (50-75 words). " + f"{base_info} include the period under review." + f"Highlight critical trends: {self.monthly_data}. Diurnal patterns indicate: {self.diurnal}. " + f"Focus on: 1) Areas exceeding air quality standards, 2) Population exposure risk assessment, " + f"3) Economic implications of poor air quality. Present clear, actionable policy recommendations with expected outcomes and implementation timeframes. " + f"Include cost-benefit considerations and potential regulatory measures. Data source: AirQo monitoring network." + ) + elif audience == "general public": + return ( + f"{audience}" + f"{base_info} include the period under review." + f"Create a clear, easy-to-understand report about air quality in {self.grid_name} for the period of {self.starttime} to {self.endtime}. Start with a simple explanation of why air quality matters for public health. " + f"We have {self.num_sites} air quality monitors in your area. The average PM2.5 level this year is {self.annual_data} µg/m³. " + f"Diurnal patterns indicate: {self.diurnal}. Monthly trends reveal: {self.monthly_data}. " + f"Explain what these numbers mean for daily activities. Include: 1) When air quality is best and worst during the day, " + f"2) Which areas have better or worse air quality, 3) Simple steps people can take to protect their health, " + f"4) How to access daily air quality updates. Use plain language and avoid technical terms. " + f"Add practical tips for reducing exposure to air pollution. Data source: AirQo monitoring network." + ) + else: + raise ValueError("Invalid audience type. Please specify 'researcher', 'policymaker', or 'general public'.") + + def generate_report_with_gemini(self, audience): + prompt = self._generate_prompt(audience) + response = self.gemini_model.generate_content(prompt) + gemini_output = response.text + return self._prepare_report_json(gemini_output) + + def generate_report_with_openai(self, audience): + prompt = self._generate_prompt(audience) + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": prompt}] + ) + openai_output = response.choices[0].message['content'] + return self._prepare_report_json(openai_output) + + # Use non-LLM template text as report content + def generate_report_template_witout_LLM(self, audience): + prompt = self._generate_prompt(audience) + report_content = prompt + return self._prepare_report_json(report_content) + + def generate_report_without_llm(self): + # Determine peak time and least PM2.5 values + peak_data = max(self.diurnal, key=lambda x: x['pm2_5_calibrated_value']) + peak_time = peak_data['hour'] + peak_pm2_5 = peak_data['pm2_5_calibrated_value'] + + least_data = min(self.diurnal, key=lambda x: x['pm2_5_calibrated_value']) + least_pm2_5 = least_data['pm2_5_calibrated_value'] + least_pm2_5_time = least_data['hour'] + + + + introduction = ( + f"The air quality report for {self.grid_name} covers the period from {self.starttime} to {self.endtime}. " + f"The {self.num_sites} monitored sites include: {', '.join(self.site_names)}. " + f"Measurements are taken for PM2.5 and PM10 concentrations. " + f"The annual average PM2.5 concentration is {self.annual_pm2_5_calibrated_value} µg/m³." + ) + + diurnal_description = ( + f"Diurnal patterns observed include the following: {self.diurnal}. " + f"These patterns provide insight into air quality fluctuations throughout the day. " + f"The peak in PM2.5 {peak_pm2_5} levels occurs around {peak_time}:00 hr, indicating a period of higher pollution, often associated with increased activity or traffic. " + f"Conversely, the period with the least PM2.5 {least_pm2_5} µg/m³ levels is around {least_pm2_5_time} :00 hr , " + f"which usually represents a period of lower activity or better atmospheric dispersion." + f"Understanding the patterns of pollution and their impacts on public health is crucial for effective environmental management and policy-making. " + f"Throughout this report, we will explore key trends in PM2.5 and PM10 concentrations, the diurnal variations, and the impact of these levels on air quality across the region." + + ) + + daily_mean_description = ( + f"Daily mean PM2.5 measurements during the period were recorded as follows: {self.daily_mean_data}. " + f"This data reveals variations in air quality on a day-to-day basis." + ) + + site_pm25_description = ( + f"The concentration of PM2.5 across different sites shows variability: " + f"{', '.join([f'{site} with PM2.5 levels' for site in self.site_names])}. " + f"These variations indicate site-specific air quality differences for the known grids." + ) + conclusion = ( + f"Overall, the air quality report highlights the importance of monitoring and understanding the patterns of PM2.5 and PM10 concentrations in the {self.grid_name} " + f"The analysis of the data reveals that air quality varies significantly over time, with periods of both moderate and unhealthy conditions. " + f"It’s observed that these fluctuations may be influenced by various factors, including seasonal changes. For instance, the washout effect during the rainy" + f" season could potentially contribute to these variations. Specifically, for the period from {self.starttime} to {self.endtime}," + f" the PM2.5 raw values ranged from {self.daily_min_pm2_5['pm2_5_raw_value']} µg/m³ on {self.daily_min_pm2_5['date']} to {self.daily_max_pm2_5['pm2_5_raw_value']} µg/m³ on {self.daily_max_pm2_5['date']}. respectively." + f"This pattern underscores the importance of continuous monitoring and the implementation of" + f"effective interventions to maintain air quality within safe limits. Ensuring good air quality is crucial for " + f"the well-being of both residents and visitors. Therefore, it’s imperative to adopt long-term" + f"strategies and measures that can effectively mitigate the impact of factors leading to poor airquality." + f"In conclusion, continuous monitoring, timely intervention, and effective policies are key to maintaining good air quality and safeguarding public health. " + ) + + report_content = ( + f"{introduction}\n\n" + f"{diurnal_description}\n\n" + f"{daily_mean_description}\n\n" + f"{site_pm25_description}\n\n" + f"{conclusion}" + ) + + + return self._prepare_report_json(report_content) + + def _prepare_report_json(self, report_content): + return { + "grid_name": self.grid_name, + "main_site": self.main_site, + "annual_data": self.annual_data, + "daily_mean_data": self.daily_mean_data, + "diurnal": self.diurnal, + "monthly_data": self.monthly_data, + "report": report_content + } \ No newline at end of file diff --git a/src/spatial/requirements.txt b/src/spatial/requirements.txt index 8f4fda8f10..d584220be0 100644 --- a/src/spatial/requirements.txt +++ b/src/spatial/requirements.txt @@ -22,4 +22,10 @@ scikit-learn~=1.5.2 gcsfs~=2024.9.0.post1 joblib~=1.4.2 lightgbm~=4.1.0 -numpy~=1.25.2 \ No newline at end of file +numpy~=1.25.2 +torch +transformers +datasets +sentencepiece +huggingface_hub +google-generativeai \ No newline at end of file diff --git a/src/spatial/views/report_view.py b/src/spatial/views/report_view.py new file mode 100644 index 0000000000..c8015ea963 --- /dev/null +++ b/src/spatial/views/report_view.py @@ -0,0 +1,42 @@ +from flask import request, jsonify +from models.report_datafetcher import DataFetcher, AirQualityReport + +class ReportView: + @staticmethod + def generate_air_quality_report(): + """Fetch air quality data and generate a report based on grid_id, start_time, end_time, and audience.""" + print('Processing request to generate air quality report...') + + # Extract data from the request + data = request.json + grid_id = data.get("grid_id") + start_time = data.get("start_time") + end_time = data.get("end_time") + audience = data.get("audience", "general public") # Default to "general public" if audience is not provided + + # Validate input parameters + if not all([grid_id, start_time, end_time, audience]): + return jsonify({"error": "Missing required parameters: grid_id, start_time, end_time, audience"}), 400 + + # Fetch air quality data + air_quality_data = DataFetcher.fetch_air_quality_data_a(grid_id, start_time, end_time) + + if air_quality_data is None: + return jsonify({"error": "No data found for the given parameters"}), 404 + + # Create an air quality report + report = AirQualityReport(air_quality_data) + + # Generate the report with the specified audience + # json_report = report.generate_report_with_gemini(audience) # using google gemini + json_report = report.generate_report_without_llm() + # json_report = report.generate_report_template_witout_LLM(audience) # without LLM + # json_report = report.generate_report_with_openai(audience) # Using openai api + + + if json_report is None: + return jsonify({"error": "Failed to generate report"}), 500 + + return jsonify({"report": json_report}), 200 + + \ No newline at end of file From 780463a7e4b2b7de16a908468c6a2de1f5aae5a6 Mon Sep 17 00:00:00 2001 From: wabinyai Date: Thu, 14 Nov 2024 11:45:35 +0300 Subject: [PATCH 02/17] review --- src/spatial/views/report_view.py | 91 ++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 29 deletions(-) diff --git a/src/spatial/views/report_view.py b/src/spatial/views/report_view.py index c8015ea963..85eb11bfc0 100644 --- a/src/spatial/views/report_view.py +++ b/src/spatial/views/report_view.py @@ -1,42 +1,75 @@ from flask import request, jsonify from models.report_datafetcher import DataFetcher, AirQualityReport +import logging class ReportView: @staticmethod def generate_air_quality_report(): """Fetch air quality data and generate a report based on grid_id, start_time, end_time, and audience.""" - print('Processing request to generate air quality report...') + logging.info('Processing request to generate air quality report...') + + # Extract and validate request data + data = ReportView._get_request_data() + if isinstance(data, dict) and data.get("error"): + return jsonify(data), 400 - # Extract data from the request - data = request.json - grid_id = data.get("grid_id") - start_time = data.get("start_time") - end_time = data.get("end_time") - audience = data.get("audience", "general public") # Default to "general public" if audience is not provided + grid_id = data["grid_id"] + start_time = data["start_time"] + end_time = data["end_time"] + audience = data.get("audience", "general public") # Default value if not provided - # Validate input parameters - if not all([grid_id, start_time, end_time, audience]): - return jsonify({"error": "Missing required parameters: grid_id, start_time, end_time, audience"}), 400 + try: + # Fetch air quality data + air_quality_data = DataFetcher.fetch_air_quality_data_a(grid_id, start_time, end_time) + if not air_quality_data: + logging.warning(f"No data found for grid_id: {grid_id}, start_time: {start_time}, end_time: {end_time}") + return jsonify({"error": "No data found for the given parameters"}), 404 - # Fetch air quality data - air_quality_data = DataFetcher.fetch_air_quality_data_a(grid_id, start_time, end_time) + # Generate the air quality report + report = AirQualityReport(air_quality_data) + json_report = ReportView._generate_report(report, audience) + if not json_report: + logging.error("Failed to generate the air quality report.") + return jsonify({"error": "Failed to generate report"}), 500 - if air_quality_data is None: - return jsonify({"error": "No data found for the given parameters"}), 404 + return jsonify({"report": json_report}), 200 - # Create an air quality report - report = AirQualityReport(air_quality_data) - - # Generate the report with the specified audience - # json_report = report.generate_report_with_gemini(audience) # using google gemini - json_report = report.generate_report_without_llm() - # json_report = report.generate_report_template_witout_LLM(audience) # without LLM - # json_report = report.generate_report_with_openai(audience) # Using openai api - - - if json_report is None: - return jsonify({"error": "Failed to generate report"}), 500 + except Exception as e: + logging.exception("Unexpected error occurred during report generation") + return jsonify({"error": "An unexpected error occurred"}), 500 - return jsonify({"report": json_report}), 200 - - \ No newline at end of file + @staticmethod + def _get_request_data(): + """Helper function to extract and validate request data.""" + try: + data = request.get_json() + if not data: + logging.error("Invalid input: JSON data expected.") + return {"error": "Invalid input. JSON data expected."} + + required_params = ["grid_id", "start_time", "end_time"] + missing_params = [param for param in required_params if not data.get(param)] + if missing_params: + logging.error(f"Missing required parameters: {', '.join(missing_params)}") + return {"error": f"Missing required parameters: {', '.join(missing_params)}"} + + return data + except Exception as e: + logging.exception("Error parsing input data.") + return {"error": "Error parsing input data."} + + @staticmethod + def _generate_report(report, audience): + """Helper function to generate report based on the audience type.""" + try: + # Generate the report with the specified audience + # You can extend this logic to switch between different report generation methods + json_report = report.generate_report_without_llm() # Example of one option + # json_report = report.generate_report_with_gemini(audience) # Using Google Gemini (if applicable) + # json_report = report.generate_report_with_openai(audience) # Using OpenAI API (if applicable) + # json_report = report.generate_report_template_without_llm(audience) # Without LLM, audience-specific + + return json_report + except Exception as e: + logging.exception("Error generating the report.") + return None From c9c7530d7f0609e1b40d9e7be7c0f69bbf246b26 Mon Sep 17 00:00:00 2001 From: wabinyai Date: Thu, 14 Nov 2024 12:14:43 +0300 Subject: [PATCH 03/17] report --- src/spatial/views/report_view.py | 91 ++++++++++---------------------- 1 file changed, 29 insertions(+), 62 deletions(-) diff --git a/src/spatial/views/report_view.py b/src/spatial/views/report_view.py index 85eb11bfc0..c8015ea963 100644 --- a/src/spatial/views/report_view.py +++ b/src/spatial/views/report_view.py @@ -1,75 +1,42 @@ from flask import request, jsonify from models.report_datafetcher import DataFetcher, AirQualityReport -import logging class ReportView: @staticmethod def generate_air_quality_report(): """Fetch air quality data and generate a report based on grid_id, start_time, end_time, and audience.""" - logging.info('Processing request to generate air quality report...') - - # Extract and validate request data - data = ReportView._get_request_data() - if isinstance(data, dict) and data.get("error"): - return jsonify(data), 400 - - grid_id = data["grid_id"] - start_time = data["start_time"] - end_time = data["end_time"] - audience = data.get("audience", "general public") # Default value if not provided + print('Processing request to generate air quality report...') - try: - # Fetch air quality data - air_quality_data = DataFetcher.fetch_air_quality_data_a(grid_id, start_time, end_time) - if not air_quality_data: - logging.warning(f"No data found for grid_id: {grid_id}, start_time: {start_time}, end_time: {end_time}") - return jsonify({"error": "No data found for the given parameters"}), 404 + # Extract data from the request + data = request.json + grid_id = data.get("grid_id") + start_time = data.get("start_time") + end_time = data.get("end_time") + audience = data.get("audience", "general public") # Default to "general public" if audience is not provided - # Generate the air quality report - report = AirQualityReport(air_quality_data) - json_report = ReportView._generate_report(report, audience) - if not json_report: - logging.error("Failed to generate the air quality report.") - return jsonify({"error": "Failed to generate report"}), 500 + # Validate input parameters + if not all([grid_id, start_time, end_time, audience]): + return jsonify({"error": "Missing required parameters: grid_id, start_time, end_time, audience"}), 400 - return jsonify({"report": json_report}), 200 + # Fetch air quality data + air_quality_data = DataFetcher.fetch_air_quality_data_a(grid_id, start_time, end_time) - except Exception as e: - logging.exception("Unexpected error occurred during report generation") - return jsonify({"error": "An unexpected error occurred"}), 500 + if air_quality_data is None: + return jsonify({"error": "No data found for the given parameters"}), 404 - @staticmethod - def _get_request_data(): - """Helper function to extract and validate request data.""" - try: - data = request.get_json() - if not data: - logging.error("Invalid input: JSON data expected.") - return {"error": "Invalid input. JSON data expected."} - - required_params = ["grid_id", "start_time", "end_time"] - missing_params = [param for param in required_params if not data.get(param)] - if missing_params: - logging.error(f"Missing required parameters: {', '.join(missing_params)}") - return {"error": f"Missing required parameters: {', '.join(missing_params)}"} - - return data - except Exception as e: - logging.exception("Error parsing input data.") - return {"error": "Error parsing input data."} + # Create an air quality report + report = AirQualityReport(air_quality_data) + + # Generate the report with the specified audience + # json_report = report.generate_report_with_gemini(audience) # using google gemini + json_report = report.generate_report_without_llm() + # json_report = report.generate_report_template_witout_LLM(audience) # without LLM + # json_report = report.generate_report_with_openai(audience) # Using openai api + + + if json_report is None: + return jsonify({"error": "Failed to generate report"}), 500 - @staticmethod - def _generate_report(report, audience): - """Helper function to generate report based on the audience type.""" - try: - # Generate the report with the specified audience - # You can extend this logic to switch between different report generation methods - json_report = report.generate_report_without_llm() # Example of one option - # json_report = report.generate_report_with_gemini(audience) # Using Google Gemini (if applicable) - # json_report = report.generate_report_with_openai(audience) # Using OpenAI API (if applicable) - # json_report = report.generate_report_template_without_llm(audience) # Without LLM, audience-specific - - return json_report - except Exception as e: - logging.exception("Error generating the report.") - return None + return jsonify({"report": json_report}), 200 + + \ No newline at end of file From 2fd6d9049c35084600b4db2006016fd7e532fd34 Mon Sep 17 00:00:00 2001 From: wabinyai Date: Thu, 14 Nov 2024 13:04:51 +0300 Subject: [PATCH 04/17] required --- src/spatial/requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/spatial/requirements.txt b/src/spatial/requirements.txt index d584220be0..acdb1a952e 100644 --- a/src/spatial/requirements.txt +++ b/src/spatial/requirements.txt @@ -23,9 +23,11 @@ gcsfs~=2024.9.0.post1 joblib~=1.4.2 lightgbm~=4.1.0 numpy~=1.25.2 +numpy torch transformers datasets sentencepiece huggingface_hub -google-generativeai \ No newline at end of file +google-generativeai +openai \ No newline at end of file From ac0bcae43b1a6df5ab496dfe89439accc19b7c9e Mon Sep 17 00:00:00 2001 From: wabinyai Date: Thu, 14 Nov 2024 20:28:54 +0300 Subject: [PATCH 05/17] RE --- src/spatial/models/report_datafetcher.py | 9 +++------ src/spatial/views/report_view.py | 6 +++--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/spatial/models/report_datafetcher.py b/src/spatial/models/report_datafetcher.py index 52317650ae..e501f30dce 100644 --- a/src/spatial/models/report_datafetcher.py +++ b/src/spatial/models/report_datafetcher.py @@ -73,6 +73,7 @@ def __init__(self, data): def _prepare_base_info(self): return ( + f"The air quality report is for {self.grid_name} for the period of {self.starttime} to {self.endtime}. " f"These air quality monitoring sites are {self.site_names} and measure PM2.5 and PM10, " f"at coordinates {self.site_latitude}°N, {self.site_longitude}°E. " @@ -82,9 +83,7 @@ def _prepare_base_info(self): def _generate_prompt(self, audience): base_info = self._prepare_base_info() if audience == "researcher": - return ( - f"{audience}" - + return ( f"Generate a comprehensive air quality assessment report for {self.grid_name} for the period of {self.starttime} to {self.endtime}. Begin with a detailed introduction (100-130 words) covering the city's geographical location, climate characteristics, population density, and major pollution sources. " f"{base_info} include the period under review." f"Daily mean measurements show: {self.daily_mean_data}. " @@ -94,7 +93,6 @@ def _generate_prompt(self, audience): ) elif audience == "policymaker": return ( - f"{audience}" f"Create an executive summary of air quality conditions in {self.grid_name} for the period of {self.starttime} to {self.endtime}. for policy decision-making. Begin with key findings and their policy implications (50-75 words). " f"{base_info} include the period under review." f"Highlight critical trends: {self.monthly_data}. Diurnal patterns indicate: {self.diurnal}. " @@ -103,8 +101,7 @@ def _generate_prompt(self, audience): f"Include cost-benefit considerations and potential regulatory measures. Data source: AirQo monitoring network." ) elif audience == "general public": - return ( - f"{audience}" + return ( f"{base_info} include the period under review." f"Create a clear, easy-to-understand report about air quality in {self.grid_name} for the period of {self.starttime} to {self.endtime}. Start with a simple explanation of why air quality matters for public health. " f"We have {self.num_sites} air quality monitors in your area. The average PM2.5 level this year is {self.annual_data} µg/m³. " diff --git a/src/spatial/views/report_view.py b/src/spatial/views/report_view.py index c8015ea963..b31556c91f 100644 --- a/src/spatial/views/report_view.py +++ b/src/spatial/views/report_view.py @@ -28,9 +28,9 @@ def generate_air_quality_report(): report = AirQualityReport(air_quality_data) # Generate the report with the specified audience - # json_report = report.generate_report_with_gemini(audience) # using google gemini - json_report = report.generate_report_without_llm() - # json_report = report.generate_report_template_witout_LLM(audience) # without LLM + json_report = report.generate_report_with_gemini(audience) # using google gemini + # json_report = report.generate_report_without_llm() + # json_report = report.generate_text_with_gpt_neo(audience) # without LLM # json_report = report.generate_report_with_openai(audience) # Using openai api From 6ec42596be4bda350f83b6df069286eefc1a45ff Mon Sep 17 00:00:00 2001 From: wabinyai Date: Thu, 14 Nov 2024 20:39:54 +0300 Subject: [PATCH 06/17] LOGGING --- src/spatial/views/report_view.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/spatial/views/report_view.py b/src/spatial/views/report_view.py index b31556c91f..745131104b 100644 --- a/src/spatial/views/report_view.py +++ b/src/spatial/views/report_view.py @@ -1,11 +1,15 @@ from flask import request, jsonify from models.report_datafetcher import DataFetcher, AirQualityReport +import logging +# Set up a basic logger configuration +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) class ReportView: @staticmethod def generate_air_quality_report(): """Fetch air quality data and generate a report based on grid_id, start_time, end_time, and audience.""" - print('Processing request to generate air quality report...') + logger.info('Received request to generate air quality report.') # Extract data from the request data = request.json From e8b5760dfb6e87db16e72223f343c8e54b9b73c1 Mon Sep 17 00:00:00 2001 From: wabinyai Date: Thu, 14 Nov 2024 21:51:32 +0300 Subject: [PATCH 07/17] Report --- src/spatial/views/report_view.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/spatial/views/report_view.py b/src/spatial/views/report_view.py index 745131104b..fbd100764e 100644 --- a/src/spatial/views/report_view.py +++ b/src/spatial/views/report_view.py @@ -1,15 +1,11 @@ from flask import request, jsonify -from models.report_datafetcher import DataFetcher, AirQualityReport -import logging -# Set up a basic logger configuration -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) +from models.report_datafetcher import DataFetcher, AirQualityReport class ReportView: @staticmethod def generate_air_quality_report(): """Fetch air quality data and generate a report based on grid_id, start_time, end_time, and audience.""" - logger.info('Received request to generate air quality report.') + # print('Processing request to generate air quality report...') # Extract data from the request data = request.json From 7bdb63e801525d55e1b079e5f4d34bd71aab0f06 Mon Sep 17 00:00:00 2001 From: wabinyai Date: Thu, 14 Nov 2024 22:13:39 +0300 Subject: [PATCH 08/17] url --- src/spatial/configure.py | 2 +- src/spatial/models/report_datafetcher.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/spatial/configure.py b/src/spatial/configure.py index b09ccc2033..f07eb81cff 100644 --- a/src/spatial/configure.py +++ b/src/spatial/configure.py @@ -24,7 +24,7 @@ class Config: BIGQUERY_SATELLITE_MODEL_PREDICTIONS = os.getenv( "BIGQUERY_SATELLITE_MODEL_PREDICTIONS" ) - + ANALTICS_URL = os.getenv("ANALTICS_URL") HUGGING_FACE_TOKEN = os.getenv("HUGGING_FACE_TOKEN") GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") diff --git a/src/spatial/models/report_datafetcher.py b/src/spatial/models/report_datafetcher.py index e501f30dce..f36ed169db 100644 --- a/src/spatial/models/report_datafetcher.py +++ b/src/spatial/models/report_datafetcher.py @@ -11,6 +11,7 @@ genai.configure(api_key=GOOGLE_API_KEY) hf_token = Config.HUGGING_FACE_TOKEN + if hf_token: login(hf_token) else: @@ -20,11 +21,12 @@ class DataFetcher: @staticmethod def fetch_air_quality_data_a(grid_id, start_time, end_time): token = Config.AIRQO_API_TOKEN + analtics_url = Config.ANALTICS_URL if token is None: print("Error: AIRQO_API_TOKEN environment variable is not set.") return None - url = f"https://platform.airqo.net/api/v2/analytics/grid/report?token={token}" + url= f"{analtics_url}?token={token}" payload = {"grid_id": grid_id, "start_time": start_time, "end_time": end_time} try: From 9d8d3aba7c113301f0cf200870024859a183c5ee Mon Sep 17 00:00:00 2001 From: wabinyai Date: Thu, 14 Nov 2024 23:17:37 +0300 Subject: [PATCH 09/17] report apis --- src/spatial/controllers/controllers.py | 6 +- src/spatial/views/report_view.py | 96 ++++++++++++++++++-------- 2 files changed, 74 insertions(+), 28 deletions(-) diff --git a/src/spatial/controllers/controllers.py b/src/spatial/controllers/controllers.py index 26ebc4b86d..a1f2895c71 100644 --- a/src/spatial/controllers/controllers.py +++ b/src/spatial/controllers/controllers.py @@ -70,4 +70,8 @@ def get_satellite_prediction(): @controller_bp.route("/air_quality_report", methods=["POST"]) def fetch_air_quality(): - return ReportView.generate_air_quality_report() \ No newline at end of file + return ReportView.generate_air_quality_report_with_gemini() + +@controller_bp.route("/air_quality_report_without_llm", methods=["POST"]) +def fetch_air_quality_without_llm(): + return ReportView.generate_air_quality_report_without_llm() \ No newline at end of file diff --git a/src/spatial/views/report_view.py b/src/spatial/views/report_view.py index fbd100764e..5c9b483b71 100644 --- a/src/spatial/views/report_view.py +++ b/src/spatial/views/report_view.py @@ -1,42 +1,84 @@ from flask import request, jsonify -from models.report_datafetcher import DataFetcher, AirQualityReport +from models.report_datafetcher import DataFetcher, AirQualityReport class ReportView: @staticmethod - def generate_air_quality_report(): - """Fetch air quality data and generate a report based on grid_id, start_time, end_time, and audience.""" - # print('Processing request to generate air quality report...') - - # Extract data from the request + def _fetch_and_validate_request_data(): + """Extract and validate request data.""" data = request.json grid_id = data.get("grid_id") start_time = data.get("start_time") end_time = data.get("end_time") - audience = data.get("audience", "general public") # Default to "general public" if audience is not provided + audience = data.get("audience", "general public") # Validate input parameters if not all([grid_id, start_time, end_time, audience]): - return jsonify({"error": "Missing required parameters: grid_id, start_time, end_time, audience"}), 400 + return None, jsonify({ + "error": "Missing required parameters: grid_id, start_time, end_time, audience" + }), 400 # Fetch air quality data air_quality_data = DataFetcher.fetch_air_quality_data_a(grid_id, start_time, end_time) - if air_quality_data is None: - return jsonify({"error": "No data found for the given parameters"}), 404 - - # Create an air quality report - report = AirQualityReport(air_quality_data) - - # Generate the report with the specified audience - json_report = report.generate_report_with_gemini(audience) # using google gemini - # json_report = report.generate_report_without_llm() - # json_report = report.generate_text_with_gpt_neo(audience) # without LLM - # json_report = report.generate_report_with_openai(audience) # Using openai api - - - if json_report is None: - return jsonify({"error": "Failed to generate report"}), 500 - - return jsonify({"report": json_report}), 200 - - \ No newline at end of file + return None, jsonify({ + "error": "No data found for the given parameters" + }), 404 + + return data, air_quality_data, None + @staticmethod + def _handle_error(exception): + return jsonify({ + "error": str(exception) + }), 500 + + @staticmethod + def generate_air_quality_report_with_gemini(): + """Generate a report using Gemini model.""" + data, air_quality_data, error_response = ReportView._fetch_and_validate_request_data() + if error_response: + return error_response + + try: + # Create an air quality report + report = AirQualityReport(air_quality_data) + # Generate the report with Gemini + json_report = report.generate_report_with_gemini(data.get("audience", "general public")) + + if json_report is None: + return jsonify({ + "error": "Failed to generate report with Gemini" + }), 500 + + return jsonify({ + "report": json_report, + "model": "gemini" + }), 200 + + except Exception as e: + return ReportView._handle_error(e) + + @staticmethod + def generate_air_quality_report_without_llm(): + """Generate a report without using LLM.""" + data, air_quality_data, error_response = ReportView._fetch_and_validate_request_data() + if error_response: + return error_response + + try: + # Create an air quality report + report = AirQualityReport(air_quality_data) + # Generate the report without LLM + json_report = report.generate_report_without_llm() + + if json_report is None: + return jsonify({ + "error": "Failed to generate report" + }), 500 + + return jsonify({ + "report": json_report, + "model": "rule_based" + }), 200 + + except Exception as e: + return ReportView._handle_error(e) From 054371fa00cb23ee8e268050ebc5c02e2b3b3613 Mon Sep 17 00:00:00 2001 From: wabinyai Date: Thu, 14 Nov 2024 23:24:02 +0300 Subject: [PATCH 10/17] logging --- src/spatial/views/report_view.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/spatial/views/report_view.py b/src/spatial/views/report_view.py index 5c9b483b71..ca36581cbf 100644 --- a/src/spatial/views/report_view.py +++ b/src/spatial/views/report_view.py @@ -28,7 +28,8 @@ def _fetch_and_validate_request_data(): @staticmethod def _handle_error(exception): return jsonify({ - "error": str(exception) + # "error": str(exception) + "error": "An internal error has occurred!" }), 500 @staticmethod From 4b7f80f33561a3929abc3eac86665c851696cb36 Mon Sep 17 00:00:00 2001 From: wabinyai Date: Thu, 14 Nov 2024 23:55:01 +0300 Subject: [PATCH 11/17] llm --- src/spatial/models/report_datafetcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spatial/models/report_datafetcher.py b/src/spatial/models/report_datafetcher.py index f36ed169db..55ee0874f2 100644 --- a/src/spatial/models/report_datafetcher.py +++ b/src/spatial/models/report_datafetcher.py @@ -132,7 +132,7 @@ def generate_report_with_openai(self, audience): return self._prepare_report_json(openai_output) # Use non-LLM template text as report content - def generate_report_template_witout_LLM(self, audience): + def generate_report_template_without_LLM(self, audience): prompt = self._generate_prompt(audience) report_content = prompt return self._prepare_report_json(report_content) From 569975d2b43b27155258647ea0e6b3d28dbe0a4e Mon Sep 17 00:00:00 2001 From: wabinyai Date: Fri, 15 Nov 2024 00:22:03 +0300 Subject: [PATCH 12/17] report --- src/spatial/views/report_view.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/spatial/views/report_view.py b/src/spatial/views/report_view.py index ca36581cbf..06860ec93f 100644 --- a/src/spatial/views/report_view.py +++ b/src/spatial/views/report_view.py @@ -83,3 +83,4 @@ def generate_air_quality_report_without_llm(): except Exception as e: return ReportView._handle_error(e) + From 66199f1aac4a030f9305415dec2f7a9d64306431 Mon Sep 17 00:00:00 2001 From: wabinyai Date: Thu, 21 Nov 2024 13:49:42 +0300 Subject: [PATCH 13/17] ai report --- src/spatial/models/report_datafetcher.py | 78 +++++++++++++++--------- 1 file changed, 50 insertions(+), 28 deletions(-) diff --git a/src/spatial/models/report_datafetcher.py b/src/spatial/models/report_datafetcher.py index 55ee0874f2..73b502b017 100644 --- a/src/spatial/models/report_datafetcher.py +++ b/src/spatial/models/report_datafetcher.py @@ -4,6 +4,7 @@ from huggingface_hub import login from configure import Config import google.generativeai as genai +import logging # Configure API keys @@ -21,12 +22,12 @@ class DataFetcher: @staticmethod def fetch_air_quality_data_a(grid_id, start_time, end_time): token = Config.AIRQO_API_TOKEN - analtics_url = Config.ANALTICS_URL + analytics_url = Config.ANALTICS_URL if token is None: print("Error: AIRQO_API_TOKEN environment variable is not set.") return None - url= f"{analtics_url}?token={token}" + url= f"{analytics_url}?token={token}" payload = {"grid_id": grid_id, "start_time": start_time, "end_time": end_time} try: @@ -35,10 +36,13 @@ def fetch_air_quality_data_a(grid_id, start_time, end_time): return response.json() except requests.exceptions.HTTPError as http_err: print(f"HTTP error occurred: {http_err}") + logging.error(f"HTTP error occurred: {http_err}") except requests.exceptions.RequestException as req_err: print(f"Request error occurred: {req_err}") + logging.error(f"Request error occurred: {req_err}") except ValueError as json_err: print(f"JSON decoding error: {json_err}") + logging.error(f"JSON decoding error: {json_err}") return None @@ -64,10 +68,13 @@ def __init__(self, data): self.annual_pm10_calibrated_value = self.annual_data.get("pm10_calibrated_value") # Finding the minimum and maximum values - self.daily_min_pm2_5 = min(self.daily_mean_data, key=lambda x: x['pm2_5_calibrated_value']) - self.daily_max_pm2_5 = max(self.daily_mean_data, key=lambda x: x['pm2_5_calibrated_value']) - - + if self.daily_mean_data: + self.daily_min_pm2_5 = min(self.daily_mean_data, key=lambda x: x['pm2_5_calibrated_value']) + self.daily_max_pm2_5 = max(self.daily_mean_data, key=lambda x: x['pm2_5_calibrated_value']) + else: + self.daily_min_pm2_5 = None + self.daily_max_pm2_5 = None + # Initialize models once in the constructor self.gemini_model = genai.GenerativeModel('gemini-pro') @@ -86,13 +93,19 @@ def _generate_prompt(self, audience): base_info = self._prepare_base_info() if audience == "researcher": return ( - f"Generate a comprehensive air quality assessment report for {self.grid_name} for the period of {self.starttime} to {self.endtime}. Begin with a detailed introduction (100-130 words) covering the city's geographical location, climate characteristics, population density, and major pollution sources. " - f"{base_info} include the period under review." - f"Daily mean measurements show: {self.daily_mean_data}. " - f"Diurnal patterns indicate: {self.diurnal}. Monthly trends reveal: {self.monthly_data}. " - f"Provide a thorough analysis of spatial and temporal air quality variations, identify pollution hotspots and clean zones, examine seasonal patterns, and assess compliance with WHO guidelines. " - f"Conclude with actionable recommendations for air quality improvement and public health protection. Data source: AirQo monitoring network." - ) +- f"Generate a comprehensive air quality assessment report for {self.grid_name} for the period of {self.starttime} to {self.endtime}. Begin with a detailed introduction (100-130 words) covering the city's geographical location, climate characteristics, population density, and major pollution sources. " +- f"{base_info} include the period under review." +- f"Daily mean measurements show: {self.daily_mean_data}. " +- f"Diurnal patterns indicate: {self.diurnal}. Monthly trends reveal: {self.monthly_data}. " ++ f"Generate a comprehensive air quality assessment report for {self.grid_name} for the period of {self.starttime} to {self.endtime}. Begin with a detailed introduction (100-130 words) covering the city's geographical location, climate characteristics, population density, and major pollution sources.\n" ++ f"{base_info}\n" ++ f"Daily mean measurements show values ranging from {self.daily_min_pm2_5['pm2_5_calibrated_value']} to {self.daily_max_pm2_5['pm2_5_calibrated_value']} µg/m³.\n" ++ f"Diurnal patterns indicate peak pollution levels at {self._format_diurnal_peak()}.\n" ++ f"Monthly trends reveal fluctuations correlated with seasonal changes.\n" + f"Provide a thorough analysis of spatial and temporal air quality variations, identify pollution hotspots and clean zones, examine seasonal patterns, and assess compliance with WHO guidelines. " + f"Conclude with actionable recommendations for air quality improvement and public health protection. Data source: AirQo monitoring network." + ) + elif audience == "policymaker": return ( f"Create an executive summary of air quality conditions in {self.grid_name} for the period of {self.starttime} to {self.endtime}. for policy decision-making. Begin with key findings and their policy implications (50-75 words). " @@ -106,7 +119,7 @@ def _generate_prompt(self, audience): return ( f"{base_info} include the period under review." f"Create a clear, easy-to-understand report about air quality in {self.grid_name} for the period of {self.starttime} to {self.endtime}. Start with a simple explanation of why air quality matters for public health. " - f"We have {self.num_sites} air quality monitors in your area. The average PM2.5 level this year is {self.annual_data} µg/m³. " + f"We have {self.num_sites} air quality monitors in your area. The average PM2.5 level this year is {self.annual_pm2_5_calibrated_value} µg/m³. " f"Diurnal patterns indicate: {self.diurnal}. Monthly trends reveal: {self.monthly_data}. " f"Explain what these numbers mean for daily activities. Include: 1) When air quality is best and worst during the day, " f"2) Which areas have better or worse air quality, 3) Simple steps people can take to protect their health, " @@ -124,12 +137,17 @@ def generate_report_with_gemini(self, audience): def generate_report_with_openai(self, audience): prompt = self._generate_prompt(audience) - response = openai.ChatCompletion.create( - model="gpt-3.5-turbo", - messages=[{"role": "user", "content": prompt}] - ) - openai_output = response.choices[0].message['content'] - return self._prepare_report_json(openai_output) + try: + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": prompt}] + ) + openai_output = response.choices[0].message['content'] + return self._prepare_report_json(openai_output) + except Exception as e: + print(f"Error: {e}") + return None + # Use non-LLM template text as report content def generate_report_template_without_LLM(self, audience): @@ -139,14 +157,18 @@ def generate_report_template_without_LLM(self, audience): def generate_report_without_llm(self): # Determine peak time and least PM2.5 values - peak_data = max(self.diurnal, key=lambda x: x['pm2_5_calibrated_value']) - peak_time = peak_data['hour'] - peak_pm2_5 = peak_data['pm2_5_calibrated_value'] - - least_data = min(self.diurnal, key=lambda x: x['pm2_5_calibrated_value']) - least_pm2_5 = least_data['pm2_5_calibrated_value'] - least_pm2_5_time = least_data['hour'] - + if self.diurnal: + peak_data = max(self.diurnal, key=lambda x: x['pm2_5_calibrated_value']) + peak_time = peak_data['hour'] + peak_pm2_5 = peak_data['pm2_5_calibrated_value'] + least_data = min(self.diurnal, key=lambda x: x['pm2_5_calibrated_value']) + least_pm2_5 = least_data['pm2_5_calibrated_value'] + least_pm2_5_time = least_data['hour'] + else: + peak_time = None + peak_pm2_5 = None + least_pm2_5 = None + least_pm2_5_time = None introduction = ( From e2b0f70cc0a6b6a16f24e5d52af1cceae76c7407 Mon Sep 17 00:00:00 2001 From: wabinyai Date: Thu, 21 Nov 2024 15:18:42 +0300 Subject: [PATCH 14/17] gemini --- src/spatial/models/report_datafetcher.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/spatial/models/report_datafetcher.py b/src/spatial/models/report_datafetcher.py index 73b502b017..49bd9e054d 100644 --- a/src/spatial/models/report_datafetcher.py +++ b/src/spatial/models/report_datafetcher.py @@ -131,9 +131,14 @@ def _generate_prompt(self, audience): def generate_report_with_gemini(self, audience): prompt = self._generate_prompt(audience) - response = self.gemini_model.generate_content(prompt) - gemini_output = response.text - return self._prepare_report_json(gemini_output) + try: + response = self.gemini_model.generate_content(prompt) + gemini_output = response.text + return self._prepare_report_json(gemini_output) + except Exception as e: + print(f"Error: {e}") + return None + def generate_report_with_openai(self, audience): prompt = self._generate_prompt(audience) From 4178f4a1f5256754fa407c3c3ece85a249e03519 Mon Sep 17 00:00:00 2001 From: wabinyai Date: Thu, 21 Nov 2024 15:19:09 +0300 Subject: [PATCH 15/17] Gemini --- src/spatial/models/report_datafetcher.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/spatial/models/report_datafetcher.py b/src/spatial/models/report_datafetcher.py index 49bd9e054d..842ee4b446 100644 --- a/src/spatial/models/report_datafetcher.py +++ b/src/spatial/models/report_datafetcher.py @@ -139,7 +139,6 @@ def generate_report_with_gemini(self, audience): print(f"Error: {e}") return None - def generate_report_with_openai(self, audience): prompt = self._generate_prompt(audience) try: From c14792f8a0d549b014d830e5837f3e4a6b98c4a3 Mon Sep 17 00:00:00 2001 From: wabinyai Date: Tue, 26 Nov 2024 17:53:28 +0300 Subject: [PATCH 16/17] chat --- src/spatial/controllers/controllers.py | 6 ++++- src/spatial/models/report_datafetcher.py | 32 +++++++++++++++++++++++- src/spatial/views/report_view.py | 26 +++++++++++++++++++ 3 files changed, 62 insertions(+), 2 deletions(-) diff --git a/src/spatial/controllers/controllers.py b/src/spatial/controllers/controllers.py index a1f2895c71..4318936d92 100644 --- a/src/spatial/controllers/controllers.py +++ b/src/spatial/controllers/controllers.py @@ -74,4 +74,8 @@ def fetch_air_quality(): @controller_bp.route("/air_quality_report_without_llm", methods=["POST"]) def fetch_air_quality_without_llm(): - return ReportView.generate_air_quality_report_without_llm() \ No newline at end of file + return ReportView.generate_air_quality_report_without_llm() + +@controller_bp.route("/air_quality_report_with_customised_prompt", methods=["POST"]) +def fetch_air_quality_with_customised_prompt(): + return ReportView.generate_air_quality_report_with_customised_prompt_gemini() \ No newline at end of file diff --git a/src/spatial/models/report_datafetcher.py b/src/spatial/models/report_datafetcher.py index 842ee4b446..70583433fc 100644 --- a/src/spatial/models/report_datafetcher.py +++ b/src/spatial/models/report_datafetcher.py @@ -55,6 +55,8 @@ def __init__(self, data): self.diurnal = data.get('airquality', {}).get('diurnal', []) self.monthly_data = data.get('airquality', {}).get('site_monthly_mean_pm', []) self.monthly_name_data = data.get('airquality', {}).get('pm_by_month_name', []) + self.site_annual_mean_pm = data.get('airquality', {}).get('site_annual_mean_pm', []) + self.site_mean_pm = data.get('airquality', {}).get('site_mean_pm', []) main_site_info = self.monthly_data[0] if self.monthly_data else {} self.main_site = main_site_info.get('site_name') self.site_names = [item.get('site_name', None) for item in self.data.get('airquality', {}).get('site_annual_mean_pm', [])] @@ -74,7 +76,7 @@ def __init__(self, data): else: self.daily_min_pm2_5 = None self.daily_max_pm2_5 = None - + # Initialize models once in the constructor self.gemini_model = genai.GenerativeModel('gemini-pro') @@ -138,6 +140,27 @@ def generate_report_with_gemini(self, audience): except Exception as e: print(f"Error: {e}") return None + # Generate report with customised prompt + def generate_report_with_customised_prompt_gemini(self, custom_prompt): + """ + Generate an air quality report using a customised user-provided prompt. + """ + base_info = self._prepare_base_info() + full_prompt = ( + + f"{base_info} include the period under review." + f"diurnal patterns indicate: {self.diurnal}. " + f"number of sites or devices or airqo binos: {self.num_sites}. " + f"{self.daily_mean_data}" + f"site mean{self.site_mean_pm}" + f"{custom_prompt}" + ) + try: + response = self.gemini_model.generate_content(full_prompt) + gemini_output = response.text + return self._prepare_customised_report_json(gemini_output) + except Exception as e: + print(f"Error: {e}") def generate_report_with_openai(self, audience): prompt = self._generate_prompt(audience) @@ -236,4 +259,11 @@ def _prepare_report_json(self, report_content): "diurnal": self.diurnal, "monthly_data": self.monthly_data, "report": report_content + } + + def _prepare_customised_report_json(self, report_content): + return { + "grid_name": self.grid_name, + "start_end_time": self.starttime + " to " + self.endtime, + "report": report_content } \ No newline at end of file diff --git a/src/spatial/views/report_view.py b/src/spatial/views/report_view.py index 06860ec93f..2a4199cb19 100644 --- a/src/spatial/views/report_view.py +++ b/src/spatial/views/report_view.py @@ -10,6 +10,7 @@ def _fetch_and_validate_request_data(): start_time = data.get("start_time") end_time = data.get("end_time") audience = data.get("audience", "general public") + customise_prompt = data.get("customise_prompt", "") # Optional customization # Validate input parameters if not all([grid_id, start_time, end_time, audience]): @@ -84,3 +85,28 @@ def generate_air_quality_report_without_llm(): except Exception as e: return ReportView._handle_error(e) + + @staticmethod + def generate_air_quality_report_with_customised_prompt_gemini(): + data, air_quality_data, error_response = ReportView._fetch_and_validate_request_data() + if error_response: + return error_response + + try: + # Create an air quality report + report = AirQualityReport(air_quality_data) + # Generate the report with Gemini + json_report = report.generate_report_with_customised_prompt_gemini(data.get("customise_prompt", "")) + + if json_report is None: + return jsonify({ + "error": "Failed to generate report with Gemini" + }), 500 + + return jsonify({ + "report": json_report, + "model": "gemini" + }), 200 + + except Exception as e: + return ReportView._handle_error(e) From e6544e049800de41369d6563e06aae574f12b6ca Mon Sep 17 00:00:00 2001 From: wabinyai Date: Tue, 26 Nov 2024 23:12:00 +0300 Subject: [PATCH 17/17] daily mean --- src/spatial/models/report_datafetcher.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/spatial/models/report_datafetcher.py b/src/spatial/models/report_datafetcher.py index 70583433fc..07e8232270 100644 --- a/src/spatial/models/report_datafetcher.py +++ b/src/spatial/models/report_datafetcher.py @@ -5,6 +5,7 @@ from configure import Config import google.generativeai as genai import logging +from functools import lru_cache # Configure API keys @@ -20,6 +21,7 @@ class DataFetcher: @staticmethod + @lru_cache(maxsize=128) # Cache up to 128 most recent queries def fetch_air_quality_data_a(grid_id, start_time, end_time): token = Config.AIRQO_API_TOKEN analytics_url = Config.ANALTICS_URL @@ -71,8 +73,16 @@ def __init__(self, data): # Finding the minimum and maximum values if self.daily_mean_data: - self.daily_min_pm2_5 = min(self.daily_mean_data, key=lambda x: x['pm2_5_calibrated_value']) - self.daily_max_pm2_5 = max(self.daily_mean_data, key=lambda x: x['pm2_5_calibrated_value']) + filtered_data = [ + item for item in self.daily_mean_data + if 'pm2_5_calibrated_value' in item and isinstance(item['pm2_5_calibrated_value'], (int, float)) + ] + if filtered_data: + self.daily_min_pm2_5 = min(filtered_data, key=lambda x: x['pm2_5_calibrated_value']) + self.daily_max_pm2_5 = max(filtered_data, key=lambda x: x['pm2_5_calibrated_value']) + else: + self.daily_min_pm2_5 = None + self.daily_max_pm2_5 = None else: self.daily_min_pm2_5 = None self.daily_max_pm2_5 = None @@ -141,6 +151,7 @@ def generate_report_with_gemini(self, audience): print(f"Error: {e}") return None # Generate report with customised prompt + @lru_cache(maxsize=64) # Cache up to 64 most recent reports def generate_report_with_customised_prompt_gemini(self, custom_prompt): """ Generate an air quality report using a customised user-provided prompt. @@ -153,6 +164,7 @@ def generate_report_with_customised_prompt_gemini(self, custom_prompt): f"number of sites or devices or airqo binos: {self.num_sites}. " f"{self.daily_mean_data}" f"site mean{self.site_mean_pm}" + f" daily {self.daily_mean_data}" f"{custom_prompt}" ) try: