-
Notifications
You must be signed in to change notification settings - Fork 22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
auto reporting #3846
auto reporting #3846
Changes from 1 commit
ae73677
780463a
c9c7530
2fd6d90
ac0bcae
6ec4259
e8b5760
7bdb63e
9d8d3ab
054371f
4b7f80f
569975d
66199f1
e2b0f70
4178f4a
c14792f
e6544e0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,214 @@ | ||||||||||||||||||||||
import requests | ||||||||||||||||||||||
import openai | ||||||||||||||||||||||
from transformers import AutoModelForCausalLM, AutoTokenizer | ||||||||||||||||||||||
from huggingface_hub import login | ||||||||||||||||||||||
from configure import Config | ||||||||||||||||||||||
import google.generativeai as genai | ||||||||||||||||||||||
|
||||||||||||||||||||||
|
||||||||||||||||||||||
# Configure API keys | ||||||||||||||||||||||
GOOGLE_API_KEY = Config.GOOGLE_API_KEY | ||||||||||||||||||||||
genai.configure(api_key=GOOGLE_API_KEY) | ||||||||||||||||||||||
hf_token = Config.HUGGING_FACE_TOKEN | ||||||||||||||||||||||
|
||||||||||||||||||||||
if hf_token: | ||||||||||||||||||||||
login(hf_token) | ||||||||||||||||||||||
else: | ||||||||||||||||||||||
print("Hugging Face token is missing. Set the 'HUGGING_FACE_TOKEN' environment variable.") | ||||||||||||||||||||||
|
||||||||||||||||||||||
class DataFetcher: | ||||||||||||||||||||||
@staticmethod | ||||||||||||||||||||||
def fetch_air_quality_data_a(grid_id, start_time, end_time): | ||||||||||||||||||||||
token = Config.AIRQO_API_TOKEN | ||||||||||||||||||||||
if token is None: | ||||||||||||||||||||||
print("Error: AIRQO_API_TOKEN environment variable is not set.") | ||||||||||||||||||||||
return None | ||||||||||||||||||||||
|
||||||||||||||||||||||
url = f"https://platform.airqo.net/api/v2/analytics/grid/report?token={token}" | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @wabinyai , is it possible to have this URL (or a section of it) as an environmental variable? Thanks for the great work. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Baalmart I am still doing some extra work on it. |
||||||||||||||||||||||
payload = {"grid_id": grid_id, "start_time": start_time, "end_time": end_time} | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Security: Remove token from URL query parameters. Including sensitive tokens in URLs is a security risk as they can be logged in server logs or exposed in browser history. Apply this diff to move the token to headers: - url = f"https://platform.airqo.net/api/v2/analytics/grid/report?token={token}"
- payload = {"grid_id": grid_id, "start_time": start_time, "end_time": end_time}
+ url = "https://platform.airqo.net/api/v2/analytics/grid/report"
+ headers = {"Authorization": f"Bearer {token}"}
+ payload = {"grid_id": grid_id, "start_time": start_time, "end_time": end_time} Update the request to use headers: - response = requests.post(url, json=payload)
+ response = requests.post(url, headers=headers, json=payload)
|
||||||||||||||||||||||
|
||||||||||||||||||||||
try: | ||||||||||||||||||||||
response = requests.post(url, json=payload) | ||||||||||||||||||||||
response.raise_for_status() | ||||||||||||||||||||||
return response.json() | ||||||||||||||||||||||
except requests.exceptions.HTTPError as http_err: | ||||||||||||||||||||||
print(f"HTTP error occurred: {http_err}") | ||||||||||||||||||||||
except requests.exceptions.RequestException as req_err: | ||||||||||||||||||||||
print(f"Request error occurred: {req_err}") | ||||||||||||||||||||||
except ValueError as json_err: | ||||||||||||||||||||||
print(f"JSON decoding error: {json_err}") | ||||||||||||||||||||||
|
||||||||||||||||||||||
return None | ||||||||||||||||||||||
|
||||||||||||||||||||||
class AirQualityReport: | ||||||||||||||||||||||
def __init__(self, data): | ||||||||||||||||||||||
self.data = data | ||||||||||||||||||||||
self.grid_name = data.get('airquality', {}).get('sites', {}).get('grid name', [None]) | ||||||||||||||||||||||
self.annual_data = data.get('airquality', {}).get('annual_pm', [None])[0] | ||||||||||||||||||||||
self.daily_mean_data = data.get('airquality', {}).get('daily_mean_pm', []) | ||||||||||||||||||||||
self.diurnal = data.get('airquality', {}).get('diurnal', []) | ||||||||||||||||||||||
self.monthly_data = data.get('airquality', {}).get('site_monthly_mean_pm', []) | ||||||||||||||||||||||
self.monthly_name_data = data.get('airquality', {}).get('pm_by_month_name', []) | ||||||||||||||||||||||
main_site_info = self.monthly_data[0] if self.monthly_data else {} | ||||||||||||||||||||||
self.main_site = main_site_info.get('site_name') | ||||||||||||||||||||||
self.site_names = [item.get('site_name', None) for item in self.data.get('airquality', {}).get('site_annual_mean_pm', [])] | ||||||||||||||||||||||
self.site_latitude = main_site_info.get('site_latitude') | ||||||||||||||||||||||
self.site_longitude = main_site_info.get('site_longitude') | ||||||||||||||||||||||
self.num_sites = data.get('airquality', {}).get('sites', {}).get('number_of_sites') | ||||||||||||||||||||||
self.starttime = data.get('airquality', {}).get('period', {}).get('startTime', '')[:10] | ||||||||||||||||||||||
self.endtime = data.get('airquality', {}).get('period', {}).get('endTime', '')[:10] | ||||||||||||||||||||||
|
||||||||||||||||||||||
self.annual_pm2_5_calibrated_value = self.annual_data.get("pm2_5_calibrated_value") | ||||||||||||||||||||||
self.annual_pm10_calibrated_value = self.annual_data.get("pm10_calibrated_value") | ||||||||||||||||||||||
|
||||||||||||||||||||||
# Finding the minimum and maximum values | ||||||||||||||||||||||
self.daily_min_pm2_5 = min(self.daily_mean_data, key=lambda x: x['pm2_5_calibrated_value']) | ||||||||||||||||||||||
self.daily_max_pm2_5 = max(self.daily_mean_data, key=lambda x: x['pm2_5_calibrated_value']) | ||||||||||||||||||||||
|
||||||||||||||||||||||
|
||||||||||||||||||||||
|
||||||||||||||||||||||
# Initialize models once in the constructor | ||||||||||||||||||||||
self.gemini_model = genai.GenerativeModel('gemini-pro') | ||||||||||||||||||||||
openai.api_key = Config.OPENAI_API_KEY | ||||||||||||||||||||||
Comment on lines
+91
to
+93
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Add error handling for model initialization. The model initialization could fail silently if API keys are invalid or if there are connection issues. # Initialize models once in the constructor
- self.gemini_model = genai.GenerativeModel('gemini-pro')
- openai.api_key = Config.OPENAI_API_KEY
+ try:
+ self.gemini_model = genai.GenerativeModel('gemini-pro')
+ openai.api_key = Config.OPENAI_API_KEY
+ except Exception as e:
+ logging.error(f"Failed to initialize AI models: {e}")
+ raise 📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||
|
||||||||||||||||||||||
def _prepare_base_info(self): | ||||||||||||||||||||||
return ( | ||||||||||||||||||||||
f"The air quality report is for {self.grid_name} for the period of {self.starttime} to {self.endtime}. " | ||||||||||||||||||||||
f"These air quality monitoring sites are {self.site_names} and measure PM2.5 and PM10, " | ||||||||||||||||||||||
f"at coordinates {self.site_latitude}°N, {self.site_longitude}°E. " | ||||||||||||||||||||||
f"The annual PM2.5 concentration averages {self.annual_data} µg/m³." | ||||||||||||||||||||||
) | ||||||||||||||||||||||
|
||||||||||||||||||||||
def _generate_prompt(self, audience): | ||||||||||||||||||||||
base_info = self._prepare_base_info() | ||||||||||||||||||||||
if audience == "researcher": | ||||||||||||||||||||||
return ( | ||||||||||||||||||||||
f"{audience}" | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Remove unnecessary inclusion of In the Apply this diff to remove the redundant lines: if audience == "researcher":
- return (
- f"{audience}"
f"Generate a comprehensive air quality assessment report for {self.grid_name} for the period of {self.starttime} to {self.endtime}. Begin with ..."
...
elif audience == "policymaker":
- return (
- f"{audience}"
f"Create an executive summary of air quality conditions in {self.grid_name} for the period of {self.starttime} to {self.endtime} for policy decision-making. Begin with ..."
...
elif audience == "general public":
- return (
- f"{audience}"
f"{base_info} include the period under review."
f"Create a clear, easy-to-understand report about air quality in {self.grid_name} for the period of {self.starttime} to {self.endtime}. Start with ..."
... Also applies to: 97-97, 107-107 |
||||||||||||||||||||||
|
||||||||||||||||||||||
f"Generate a comprehensive air quality assessment report for {self.grid_name} for the period of {self.starttime} to {self.endtime}. Begin with a detailed introduction (100-130 words) covering the city's geographical location, climate characteristics, population density, and major pollution sources. " | ||||||||||||||||||||||
f"{base_info} include the period under review." | ||||||||||||||||||||||
f"Daily mean measurements show: {self.daily_mean_data}. " | ||||||||||||||||||||||
f"Diurnal patterns indicate: {self.diurnal}. Monthly trends reveal: {self.monthly_data}. " | ||||||||||||||||||||||
f"Provide a thorough analysis of spatial and temporal air quality variations, identify pollution hotspots and clean zones, examine seasonal patterns, and assess compliance with WHO guidelines. " | ||||||||||||||||||||||
f"Conclude with actionable recommendations for air quality improvement and public health protection. Data source: AirQo monitoring network." | ||||||||||||||||||||||
) | ||||||||||||||||||||||
elif audience == "policymaker": | ||||||||||||||||||||||
return ( | ||||||||||||||||||||||
f"{audience}" | ||||||||||||||||||||||
f"Create an executive summary of air quality conditions in {self.grid_name} for the period of {self.starttime} to {self.endtime}. for policy decision-making. Begin with key findings and their policy implications (50-75 words). " | ||||||||||||||||||||||
f"{base_info} include the period under review." | ||||||||||||||||||||||
f"Highlight critical trends: {self.monthly_data}. Diurnal patterns indicate: {self.diurnal}. " | ||||||||||||||||||||||
f"Focus on: 1) Areas exceeding air quality standards, 2) Population exposure risk assessment, " | ||||||||||||||||||||||
f"3) Economic implications of poor air quality. Present clear, actionable policy recommendations with expected outcomes and implementation timeframes. " | ||||||||||||||||||||||
f"Include cost-benefit considerations and potential regulatory measures. Data source: AirQo monitoring network." | ||||||||||||||||||||||
) | ||||||||||||||||||||||
elif audience == "general public": | ||||||||||||||||||||||
return ( | ||||||||||||||||||||||
f"{audience}" | ||||||||||||||||||||||
f"{base_info} include the period under review." | ||||||||||||||||||||||
f"Create a clear, easy-to-understand report about air quality in {self.grid_name} for the period of {self.starttime} to {self.endtime}. Start with a simple explanation of why air quality matters for public health. " | ||||||||||||||||||||||
f"We have {self.num_sites} air quality monitors in your area. The average PM2.5 level this year is {self.annual_data} µg/m³. " | ||||||||||||||||||||||
f"Diurnal patterns indicate: {self.diurnal}. Monthly trends reveal: {self.monthly_data}. " | ||||||||||||||||||||||
f"Explain what these numbers mean for daily activities. Include: 1) When air quality is best and worst during the day, " | ||||||||||||||||||||||
f"2) Which areas have better or worse air quality, 3) Simple steps people can take to protect their health, " | ||||||||||||||||||||||
f"4) How to access daily air quality updates. Use plain language and avoid technical terms. " | ||||||||||||||||||||||
f"Add practical tips for reducing exposure to air pollution. Data source: AirQo monitoring network." | ||||||||||||||||||||||
) | ||||||||||||||||||||||
else: | ||||||||||||||||||||||
raise ValueError("Invalid audience type. Please specify 'researcher', 'policymaker', or 'general public'.") | ||||||||||||||||||||||
|
||||||||||||||||||||||
def generate_report_with_gemini(self, audience): | ||||||||||||||||||||||
prompt = self._generate_prompt(audience) | ||||||||||||||||||||||
response = self.gemini_model.generate_content(prompt) | ||||||||||||||||||||||
gemini_output = response.text | ||||||||||||||||||||||
return self._prepare_report_json(gemini_output) | ||||||||||||||||||||||
wabinyai marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||
|
||||||||||||||||||||||
def generate_report_with_openai(self, audience): | ||||||||||||||||||||||
prompt = self._generate_prompt(audience) | ||||||||||||||||||||||
response = openai.ChatCompletion.create( | ||||||||||||||||||||||
model="gpt-3.5-turbo", | ||||||||||||||||||||||
messages=[{"role": "user", "content": prompt}] | ||||||||||||||||||||||
) | ||||||||||||||||||||||
openai_output = response.choices[0].message['content'] | ||||||||||||||||||||||
return self._prepare_report_json(openai_output) | ||||||||||||||||||||||
|
||||||||||||||||||||||
# Use non-LLM template text as report content | ||||||||||||||||||||||
def generate_report_template_witout_LLM(self, audience): | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Correct the typo in the method name There's a typo in the method name Apply this diff to fix the method name: - def generate_report_template_witout_LLM(self, audience):
+ def generate_report_template_without_LLM(self, audience): Make sure to update any calls to this method accordingly. 📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||
prompt = self._generate_prompt(audience) | ||||||||||||||||||||||
report_content = prompt | ||||||||||||||||||||||
return self._prepare_report_json(report_content) | ||||||||||||||||||||||
|
||||||||||||||||||||||
def generate_report_without_llm(self): | ||||||||||||||||||||||
# Determine peak time and least PM2.5 values | ||||||||||||||||||||||
peak_data = max(self.diurnal, key=lambda x: x['pm2_5_calibrated_value']) | ||||||||||||||||||||||
peak_time = peak_data['hour'] | ||||||||||||||||||||||
peak_pm2_5 = peak_data['pm2_5_calibrated_value'] | ||||||||||||||||||||||
|
||||||||||||||||||||||
least_data = min(self.diurnal, key=lambda x: x['pm2_5_calibrated_value']) | ||||||||||||||||||||||
least_pm2_5 = least_data['pm2_5_calibrated_value'] | ||||||||||||||||||||||
least_pm2_5_time = least_data['hour'] | ||||||||||||||||||||||
|
||||||||||||||||||||||
|
||||||||||||||||||||||
|
||||||||||||||||||||||
introduction = ( | ||||||||||||||||||||||
f"The air quality report for {self.grid_name} covers the period from {self.starttime} to {self.endtime}. " | ||||||||||||||||||||||
f"The {self.num_sites} monitored sites include: {', '.join(self.site_names)}. " | ||||||||||||||||||||||
f"Measurements are taken for PM2.5 and PM10 concentrations. " | ||||||||||||||||||||||
f"The annual average PM2.5 concentration is {self.annual_pm2_5_calibrated_value} µg/m³." | ||||||||||||||||||||||
) | ||||||||||||||||||||||
|
||||||||||||||||||||||
diurnal_description = ( | ||||||||||||||||||||||
f"Diurnal patterns observed include the following: {self.diurnal}. " | ||||||||||||||||||||||
f"These patterns provide insight into air quality fluctuations throughout the day. " | ||||||||||||||||||||||
f"The peak in PM2.5 {peak_pm2_5} levels occurs around {peak_time}:00 hr, indicating a period of higher pollution, often associated with increased activity or traffic. " | ||||||||||||||||||||||
f"Conversely, the period with the least PM2.5 {least_pm2_5} µg/m³ levels is around {least_pm2_5_time} :00 hr , " | ||||||||||||||||||||||
f"which usually represents a period of lower activity or better atmospheric dispersion." | ||||||||||||||||||||||
f"Understanding the patterns of pollution and their impacts on public health is crucial for effective environmental management and policy-making. " | ||||||||||||||||||||||
f"Throughout this report, we will explore key trends in PM2.5 and PM10 concentrations, the diurnal variations, and the impact of these levels on air quality across the region." | ||||||||||||||||||||||
|
||||||||||||||||||||||
) | ||||||||||||||||||||||
|
||||||||||||||||||||||
daily_mean_description = ( | ||||||||||||||||||||||
f"Daily mean PM2.5 measurements during the period were recorded as follows: {self.daily_mean_data}. " | ||||||||||||||||||||||
f"This data reveals variations in air quality on a day-to-day basis." | ||||||||||||||||||||||
) | ||||||||||||||||||||||
|
||||||||||||||||||||||
site_pm25_description = ( | ||||||||||||||||||||||
f"The concentration of PM2.5 across different sites shows variability: " | ||||||||||||||||||||||
f"{', '.join([f'{site} with PM2.5 levels' for site in self.site_names])}. " | ||||||||||||||||||||||
f"These variations indicate site-specific air quality differences for the known grids." | ||||||||||||||||||||||
) | ||||||||||||||||||||||
conclusion = ( | ||||||||||||||||||||||
f"Overall, the air quality report highlights the importance of monitoring and understanding the patterns of PM2.5 and PM10 concentrations in the {self.grid_name} " | ||||||||||||||||||||||
f"The analysis of the data reveals that air quality varies significantly over time, with periods of both moderate and unhealthy conditions. " | ||||||||||||||||||||||
f"It’s observed that these fluctuations may be influenced by various factors, including seasonal changes. For instance, the washout effect during the rainy" | ||||||||||||||||||||||
f" season could potentially contribute to these variations. Specifically, for the period from {self.starttime} to {self.endtime}," | ||||||||||||||||||||||
f" the PM2.5 raw values ranged from {self.daily_min_pm2_5['pm2_5_raw_value']} µg/m³ on {self.daily_min_pm2_5['date']} to {self.daily_max_pm2_5['pm2_5_raw_value']} µg/m³ on {self.daily_max_pm2_5['date']}. respectively." | ||||||||||||||||||||||
f"This pattern underscores the importance of continuous monitoring and the implementation of" | ||||||||||||||||||||||
f"effective interventions to maintain air quality within safe limits. Ensuring good air quality is crucial for " | ||||||||||||||||||||||
f"the well-being of both residents and visitors. Therefore, it’s imperative to adopt long-term" | ||||||||||||||||||||||
f"strategies and measures that can effectively mitigate the impact of factors leading to poor airquality." | ||||||||||||||||||||||
f"In conclusion, continuous monitoring, timely intervention, and effective policies are key to maintaining good air quality and safeguarding public health. " | ||||||||||||||||||||||
) | ||||||||||||||||||||||
|
||||||||||||||||||||||
report_content = ( | ||||||||||||||||||||||
f"{introduction}\n\n" | ||||||||||||||||||||||
f"{diurnal_description}\n\n" | ||||||||||||||||||||||
f"{daily_mean_description}\n\n" | ||||||||||||||||||||||
f"{site_pm25_description}\n\n" | ||||||||||||||||||||||
f"{conclusion}" | ||||||||||||||||||||||
) | ||||||||||||||||||||||
|
||||||||||||||||||||||
|
||||||||||||||||||||||
return self._prepare_report_json(report_content) | ||||||||||||||||||||||
|
||||||||||||||||||||||
def _prepare_report_json(self, report_content): | ||||||||||||||||||||||
return { | ||||||||||||||||||||||
"grid_name": self.grid_name, | ||||||||||||||||||||||
"main_site": self.main_site, | ||||||||||||||||||||||
"annual_data": self.annual_data, | ||||||||||||||||||||||
"daily_mean_data": self.daily_mean_data, | ||||||||||||||||||||||
"diurnal": self.diurnal, | ||||||||||||||||||||||
"monthly_data": self.monthly_data, | ||||||||||||||||||||||
"report": report_content | ||||||||||||||||||||||
} |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -22,4 +22,10 @@ scikit-learn~=1.5.2 | |||||||||||||||||||||||||
gcsfs~=2024.9.0.post1 | ||||||||||||||||||||||||||
joblib~=1.4.2 | ||||||||||||||||||||||||||
lightgbm~=4.1.0 | ||||||||||||||||||||||||||
numpy~=1.25.2 | ||||||||||||||||||||||||||
numpy~=1.25.2 | ||||||||||||||||||||||||||
torch | ||||||||||||||||||||||||||
transformers | ||||||||||||||||||||||||||
datasets | ||||||||||||||||||||||||||
sentencepiece | ||||||||||||||||||||||||||
huggingface_hub | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add version constraints for new ML/NLP dependencies For security and reproducibility, it's important to pin versions for ML/NLP dependencies. This is especially crucial for libraries that download and execute models or make API calls. Apply this diff to add version constraints: -torch
-transformers
-datasets
-sentencepiece
-huggingface_hub
-google-generativeai
+torch~=2.2.1
+transformers~=4.38.2
+datasets~=2.18.0
+sentencepiece~=0.1.99
+huggingface_hub~=0.21.4
+google-generativeai~=0.3.2 📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||||||
google-generativeai |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,42 @@ | ||||||||||||||||||||||||||||||||
from flask import request, jsonify | ||||||||||||||||||||||||||||||||
from models.report_datafetcher import DataFetcher, AirQualityReport | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
class ReportView: | ||||||||||||||||||||||||||||||||
@staticmethod | ||||||||||||||||||||||||||||||||
def generate_air_quality_report(): | ||||||||||||||||||||||||||||||||
"""Fetch air quality data and generate a report based on grid_id, start_time, end_time, and audience.""" | ||||||||||||||||||||||||||||||||
print('Processing request to generate air quality report...') | ||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Enhance method documentation and logging The docstring should include:
Also, replace the print statement with proper logging. @staticmethod
def generate_air_quality_report():
- """Fetch air quality data and generate a report based on grid_id, start_time, end_time, and audience."""
- print('Processing request to generate air quality report...')
+ """Generate an air quality report for a specific grid and time range.
+
+ Parameters:
+ grid_id (str): The identifier for the geographic grid
+ start_time (str): ISO format start time
+ end_time (str): ISO format end time
+ audience (str, optional): Target audience for the report. Defaults to "general public"
+
+ Returns:
+ tuple: (JSON response, HTTP status code)
+
+ Status Codes:
+ 200: Success
+ 400: Missing parameters
+ 404: No data found
+ 500: Report generation failed
+ """
+ current_app.logger.info('Processing request to generate air quality report...')
|
||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
# Extract data from the request | ||||||||||||||||||||||||||||||||
data = request.json | ||||||||||||||||||||||||||||||||
grid_id = data.get("grid_id") | ||||||||||||||||||||||||||||||||
start_time = data.get("start_time") | ||||||||||||||||||||||||||||||||
end_time = data.get("end_time") | ||||||||||||||||||||||||||||||||
audience = data.get("audience", "general public") # Default to "general public" if audience is not provided | ||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add input validation and sanitization The current implementation lacks:
Consider implementing a validation function: def validate_report_params(grid_id, start_time, end_time, audience):
"""Validate and sanitize report parameters."""
try:
# Validate grid_id format
if not isinstance(grid_id, str) or not grid_id.strip():
return False, "Invalid grid_id format"
# Validate and parse dates
from datetime import datetime
try:
start = datetime.fromisoformat(start_time)
end = datetime.fromisoformat(end_time)
if end <= start:
return False, "end_time must be after start_time"
except ValueError:
return False, "Invalid date format. Use ISO format"
# Validate audience
valid_audiences = {"general public", "technical", "policy makers"}
if audience.lower() not in valid_audiences:
return False, f"audience must be one of: {valid_audiences}"
return True, None
except Exception as e:
return False, str(e) |
||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
# Validate input parameters | ||||||||||||||||||||||||||||||||
if not all([grid_id, start_time, end_time, audience]): | ||||||||||||||||||||||||||||||||
return jsonify({"error": "Missing required parameters: grid_id, start_time, end_time, audience"}), 400 | ||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Enhance error message specificity The current error handling doesn't specify which parameters are missing. This makes debugging harder for API consumers. - if not all([grid_id, start_time, end_time, audience]):
- return jsonify({"error": "Missing required parameters: grid_id, start_time, end_time, audience"}), 400
+ missing_params = []
+ if not grid_id: missing_params.append("grid_id")
+ if not start_time: missing_params.append("start_time")
+ if not end_time: missing_params.append("end_time")
+ if not audience: missing_params.append("audience")
+
+ if missing_params:
+ return jsonify({
+ "error": "Missing required parameters",
+ "missing_parameters": missing_params
+ }), 400 📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
# Fetch air quality data | ||||||||||||||||||||||||||||||||
air_quality_data = DataFetcher.fetch_air_quality_data_a(grid_id, start_time, end_time) | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
if air_quality_data is None: | ||||||||||||||||||||||||||||||||
return jsonify({"error": "No data found for the given parameters"}), 404 | ||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💡 Codebase verification 🛠️ Refactor suggestion Improve error handling and logging in both DataFetcher and report_view The verification reveals several issues that validate the original review:
# In src/spatial/models/report_datafetcher.py
class DataFetcher:
@staticmethod
def fetch_air_quality_data_a(grid_id, start_time, end_time):
token = Config.AIRQO_API_TOKEN
if token is None:
- print("Error: AIRQO_API_TOKEN environment variable is not set.")
- return None
+ raise ValueError("AIRQO_API_TOKEN environment variable is not set")
url = f"https://platform.airqo.net/api/v2/analytics/grid/report?token={token}"
payload = {"grid_id": grid_id, "start_time": start_time, "end_time": end_time}
try:
- response = requests.post(url, json=payload)
+ response = requests.post(url, json=payload, timeout=30)
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as http_err:
- print(f"HTTP error occurred: {http_err}")
+ raise RuntimeError(f"HTTP error occurred: {http_err}")
except requests.exceptions.RequestException as req_err:
- print(f"Request error occurred: {req_err}")
+ raise RuntimeError(f"Request error occurred: {req_err}")
except ValueError as json_err:
- print(f"JSON decoding error: {json_err}")
+ raise RuntimeError(f"JSON decoding error: {json_err}")
- return None The original review comment's suggestion for error handling in 🔗 Analysis chainImprove error handling and verify DataFetcher implementation The current implementation has several concerns:
# Fetch air quality data
+ try:
+ air_quality_data = DataFetcher.fetch_air_quality_data_a(
+ grid_id, start_time, end_time
+ )
+ except Exception as e:
+ current_app.logger.error(f"Error fetching data: {str(e)}")
+ return jsonify({
+ "error": "Failed to fetch air quality data",
+ "details": str(e)
+ }), 500
- air_quality_data = DataFetcher.fetch_air_quality_data_a(grid_id, start_time, end_time)
if air_quality_data is None:
+ current_app.logger.warning(
+ f"No data found for grid_id: {grid_id}, "
+ f"period: {start_time} to {end_time}"
+ )
return jsonify({"error": "No data found for the given parameters"}), 404 Let me gather more information about the error handling in DataFetcher. This will help us understand:
🏁 Scripts executedThe following scripts were executed for the analysis: Script: #!/bin/bash
# Search for other fetch implementations and their usage
ast-grep --pattern 'fetch_air_quality_data_$_'
# Look for DataFetcher implementation
rg -A 10 'class DataFetcher'
Length of output: 976 Script: #!/bin/bash
# Look for the complete implementation of fetch_air_quality_data_a
rg -A 20 'def fetch_air_quality_data_a'
# Check if there are any other fetch methods in DataFetcher
rg -A 5 'def fetch' src/spatial/models/report_datafetcher.py
# Look for any request handling or timeout configurations
rg -A 5 'requests\.' src/spatial/models/report_datafetcher.py
Length of output: 2639 |
||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
# Create an air quality report | ||||||||||||||||||||||||||||||||
report = AirQualityReport(air_quality_data) | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
# Generate the report with the specified audience | ||||||||||||||||||||||||||||||||
# json_report = report.generate_report_with_gemini(audience) # using google gemini | ||||||||||||||||||||||||||||||||
json_report = report.generate_report_without_llm() | ||||||||||||||||||||||||||||||||
# json_report = report.generate_report_template_witout_LLM(audience) # without LLM | ||||||||||||||||||||||||||||||||
# json_report = report.generate_report_with_openai(audience) # Using openai api | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
if json_report is None: | ||||||||||||||||||||||||||||||||
return jsonify({"error": "Failed to generate report"}), 500 | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
return jsonify({"report": json_report}), 200 | ||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Enhance error handling and add logging for failures The error handling for report generation failures could be more informative and include logging. if json_report is None:
+ current_app.logger.error(
+ f"Failed to generate report for grid_id: {grid_id}, "
+ f"period: {start_time} to {end_time}"
+ )
return jsonify({"error": "Failed to generate report"}), 500
+ current_app.logger.info(
+ f"Successfully generated report for grid_id: {grid_id}, "
+ f"period: {start_time} to {end_time}"
+ )
return jsonify({"report": json_report}), 200
|
||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🛠️ Refactor suggestion
Remove unused imports from the transformers library.
The
AutoModelForCausalLM
andAutoTokenizer
imports are not used in the current implementation.-from transformers import AutoModelForCausalLM, AutoTokenizer
📝 Committable suggestion
🧰 Tools
🪛 Ruff
3-3:
transformers.AutoModelForCausalLM
imported but unusedRemove unused import
(F401)
3-3:
transformers.AutoTokenizer
imported but unusedRemove unused import
(F401)