Skip to content

Commit

Permalink
Merge pull request #4177 from NicholasTurner23/update-fix/Clean_up
Browse files Browse the repository at this point in the history
Update fix/clean up
  • Loading branch information
Baalmart authored Jan 7, 2025
2 parents 676af0c + e54abfb commit 425f5c2
Showing 1 changed file with 19 additions and 10 deletions.
29 changes: 19 additions & 10 deletions src/workflows/airqo_etl_utils/data_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,16 +81,15 @@ def format_data_types(
for col in integers:
data[col] = (
data[col]
.fillna(
""
) # Replace NaNs with empty strings to avoid errors during string operations
.astype(str) # Ensure the column is a string
.str.replace(
r"[^\d]", "", regex=True
) # Remove non-numeric characters
.str.strip() # Strip leading/trailing whitespace
.replace("", -1) # Replace empty strings with -1
.astype(np.int64) # Convert to integer
.fillna("") # Replace NaN with empty strings
.astype(str) # Convert to string
.str.strip() # Remove leading/trailing whitespace
.replace("", np.nan) # Replace empty strings with NaN for clarity
.apply(
lambda x: pd.to_numeric(x, errors="coerce")
) # Convert to numeric
.fillna(-1) # Replace NaN with -1 for invalid/missing values
.astype(np.int64) # Convert to integer type
)

return data
Expand Down Expand Up @@ -275,6 +274,16 @@ def process_data_for_api(data: pd.DataFrame) -> list:
cols = bigquery_api.get_columns(bigquery_api.hourly_measurements_table)
cols.append("battery")
data = DataValidationUtils.fill_missing_columns(data, cols=cols)
data["device_number"] = (
data["device_number"]
.fillna("")
.astype(str)
.str.strip()
.replace("", np.nan)
.apply(lambda x: pd.to_numeric(x, errors="coerce"))
.fillna(-1)
.astype(np.int64)
)

for _, row in data.iterrows():
try:
Expand Down

0 comments on commit 425f5c2

Please sign in to comment.