Initial commit

akarich73 · Oct 19, 2024 · bce1b50 · bce1b50
1 parent 0f56603
commit bce1b50
Show file tree

Hide file tree

Showing 15 changed files with 466 additions and 390 deletions.
diff --git a/.idea/barra2-dl.iml b/.idea/barra2-dl.iml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/barra2_dl/__init__.py b/barra2_dl/__init__.py
@@ -1 +1 @@
-from . import globals, helpers, downloaders
+from . import globals, downloader, merger, helpers
diff --git a/barra2_dl/downloaders.py → barra2_dl/downloader.py b/barra2_dl/downloaders.py → barra2_dl/downloader.py
@@ -6,6 +6,7 @@
 from pathlib import Path
 import calendar
 from .helpers import list_months
+from .helpers_geo import format_lat_lon
 from .globals import LatLonPoint, LatLonBBox, barra2_aus11_index
 
 
@@ -52,7 +53,7 @@ def barra2_point_downloader(base_url: str,
                             lat_lon_point: LatLonPoint,
                             start_datetime: str | datetime,
                             end_datetime: str | datetime,
-                            fileout_prefix: str,
+                            fileout_prefix: str = None,
                             fileout_folder: str = 'cache',
                             fileout_type: str = 'csv_file') -> None:
     """Download barra2 data based on the url and variables list
@@ -76,10 +77,11 @@ def barra2_point_downloader(base_url: str,
         Change from using os to pathlib
 
     """
+    # set default fileout_prefix if not set by user
 
     # loop through each variable requested for download as each variable is saved in a separate url
     for var in barra2_var:
-        # loop through each month as each BARRA2 file is saved by month
+        # loop through each month as each BARRA2 file is saved by month todo check index enumerate addition works
         for date in list_months(start_datetime, end_datetime, freq="MS"):
             year = date.year
             month = date.month
@@ -97,8 +99,12 @@ def barra2_point_downloader(base_url: str,
             folder_path = fileout_folder
             download_file(url, folder_path, fileout_name, create_folder=True)
 
-    return
+            # todo add option to name file_prefix using BARRA2 node; might need index 0 check
+            # if fileout_prefix is None:
+            #   fileout_prefix = BARRA2_aus11_index[lat_lon_point['lat']][lat_lon_point['lon']]
+
 
+    return
 
 
 
diff --git a/barra2_dl/globals.py b/barra2_dl/globals.py
@@ -40,16 +40,19 @@ class LatLonBBox(TypedDict):
 # VARIABLES
 # -----------------------------------------------------------------------------
 
-# barra2_aus11_extents
-barra2_aus11_lat_lon_bbox = LatLonBBox(north=-23.0, west=133.0, east=134.0, south=-24)
+# barra2_aus11_extents http://www.bom.gov.au/research/publications/researchreports/BRR-067.pdf
+# todo consider updating Barra2 to Barra-r2 to match published data convention
+barra_r2_aus11_lat_lon_bbox = LatLonBBox(north=12.95, south=-57.97, east=207.39, west=88.48)
+
+barra_r2_grid_spacing = 0.11
 
 # base thredds url for BARRA2 11km 1hour reanalysis data
 barra2_aus11_csv_url = ("https://thredds.nci.org.au/thredds/ncss/grid/ob53/output/reanalysis/AUS-11/BOM/ERA5"
                         "/historical/hres/BARRA-R2/v1/1hr/{var}/latest/"
                         "{var}_AUS-11_ERA5_historical_hres_BOM_BARRA-R2_v1_1hr_{year}{month:02d}-{year}{month:02d}.nc")
 
 # index for barra2 used to join separate files
-barra2_aus11_index = ['time', 'station', 'latitude[unit="degrees_north"]', 'longitude[unit="degrees_east"]']
+barra2_aus11_index = ['time','station', 'latitude[unit="degrees_north"]', 'longitude[unit="degrees_east"]']
 
 # BARRA2 wind speed variable pairs
 barra2_aus11_wind_all = [('ua10m', 'va10m', '10m[unit="m s-1"]'),

diff --git a/barra2_dl/helpers.py b/barra2_dl/helpers.py
@@ -1,11 +1,9 @@
 """
-This module contains helper functions.
+General helper functions.
 """
 
 from pathlib import Path
 import pandas as pd
-import numpy as np
-import fnmatch
 from typing import List, Union
 
 
@@ -25,118 +23,6 @@ def list_months(start_datetime: str, end_datetime: str, freq: str ='MS', **kwarg
     return df_to_list
 
 
-def list_csv_files(folder_path):
-    """
-    List all CSV files in the given folder.
-
-    Args:
-        folder_path (str): The path to the folder containing the CSV files.
-
-    Returns:
-        list: A list of CSV file names in the folder.
-    """
-    folder = Path(folder_path)
-    csv_files = [file.name for file in folder.glob('*.csv')]
-    return csv_files
-
-
-def filter_list_using_wildcard(input_list: list[str], pattern:str):
-    """
-    Filter a list using a wildcard pattern.
-
-    Args:
-        input_list (list[str]): The list of strings to be filtered.
-        pattern (str): The wildcard pattern to filter the list.
-
-    Returns:
-        list: A list of strings that match the wildcard pattern.
-    """
-    filtered_list = fnmatch.filter(input_list, pattern)
-    return filtered_list
-
-
-def merge_csv_files_to_dataframe(filein_folder: str,
-                    filename_pattern: str = '*.csv',
-                    index_for_join: str = None) -> pd.DataFrame:
-    """
-    Merge csv files from a folder based on optional filename wildcard using fnmatch.
-    If filename wildcard is omitted all csv files in the folder will be merged.
-    If fileout_folder is omitted the merged file will be saved in the filein_folder.
-
-    Args:
-        filein_folder (str): Optional
-        filename_pattern (str):
-        index_for_join (str):
-
-    Returns:
-        return_type: None.
-
-    Todo:
-        Change from using os to pathlib
-    """
-
-    # todo add .csv check for filename_prefix
-
-
-    # list all csv files in folder
-    csv_files = list_csv_files(filein_folder)
-
-    # filter csv files
-    csv_files_filtered = filter_list_using_wildcard(csv_files, filename_pattern)
-
-    # initiate dataframe for combined csv results
-    df_combined = pd.DataFrame()
-
-    for file in Path(filein_folder).glob(filename_pattern):
-        if df_combined.empty:
-            # read csv file without indexing to retain time as column for join
-            df_combined = pd.read_csv(file)
-        else:
-            # read next file into new df
-            df_add = pd.read_csv(file)
-            # combine on index join if not None, otherwise just concat together
-            if index_for_join is not None:
-                df_combined = df_combined.join(df_add.set_index(index_for_join),on=index_for_join)
-            else:
-                df_combined = pd.concat([df_combined, df_add], ignore_index = True)
-
-    return df_combined
-
-
-def export_dataframe_to_csv(dataframe: pd.DataFrame,
-                            fileout_folder: str | Path,
-                            fileout_name: str,
-                            create_folder: bool = True) -> None:
-    """
-    Export a DataFrame to a CSV file in the specified folder with the given file name.
-
-    Args:
-        dataframe (pd.DataFrame): The Pandas DataFrame to export.
-        fileout_folder (str or Path): The path to the folder where the CSV file will be saved.
-        fileout_name (str): The name of the CSV file to save.
-        create_folder (bool): If True, creates the folder if it does not exist; otherwise, exits if the folder doesn't exist.
-
-    Returns:
-        Path: The path of the saved CSV file.
-    """
-    fileout_folder = Path(fileout_folder)
-    # Check if the folder exists
-    if not fileout_folder.exists():
-        if create_folder:
-            fileout_folder.mkdir(parents=True)
-            print(f"The folder '{fileout_folder}' was created.")
-        else:
-            print(f"The folder '{fileout_folder}' does not exist. Exiting...")
-            return
-
-    # Define the full path for the CSV file
-    fileout_path_name = fileout_folder / fileout_name
-
-    # Export the DataFrame to CSV
-    dataframe.to_csv(fileout_path_name, index=False)
-
-    return fileout_path_name
-
 
 def get_timestamp_range_list(dataframe: pd.DataFrame, timestamp_column: str) -> List[pd.Timestamp]:
     """
@@ -193,121 +79,48 @@ def combine_csv_files(file_paths: list[str | Path], output_file: str | Path, ind
 
     return
 
-# todo draft function to process csvs
-def process_csvs:
-      # process barra2 variables to wind speed and direction todo split into modules
-
-    # initiate DataFrame for adding new columns
-    df_processed = df_combined
-
-    # loop through df_combined to df_processed
-    for tup in barra2_wind_speeds:
-        mask_wind_speed_h = tup[0][2:]
-        mask_ua = df_combined.columns.str.contains(tup[0])  # selects column header
-        mask_va = df_combined.columns.str.contains(tup[1])  # selects column header
-
-        if np.any(mask_ua == True) and np.any(mask_va == True):
-            df_processed_ua = df_combined.loc[:, mask_ua]  # selects mask
-            df_processed_va = df_combined.loc[:, mask_va]  # selects mask
-
-            print('Converted: ' + tup.__str__())
-
-            df_processed_v = pd.DataFrame(np.sqrt(df_processed_ua.iloc[:, 0] ** 2 + df_processed_va.iloc[:, 0] ** 2))
-            df_processed_v.columns = ['v' + mask_wind_speed_h + '[unit="m s-1"]']
-
-            df_processed_phi_met = pd.DataFrame()
-
-            for index, row in df_combined.iterrows():
-                if (df_processed_ua.iloc[index, 0] == 0) and (df_processed_va.iloc[index, 0] == 0):
-                    df_processed_phi_met.loc[index, 'v' + mask_wind_speed_h + '_' + 'phi_met[unit="degrees"]'] = 0.0
-                else:
-                    df_processed_phi_met.loc[index, 'v' + mask_wind_speed_h + '_' + 'phi_met[unit="degrees"]'] = (
-                        np.mod(180 + np.rad2deg(
-                            np.arctan2(df_processed_ua.iloc[index, 0], df_processed_va.iloc[index, 0])), 360))
-
-            # Merge the current variable DataFrame with the combined DataFrame
-            df_processed = df_processed.join(df_processed_v)
-            df_processed = df_processed.join(df_processed_phi_met)
-
-    # export combined to csv
-    df_processed.to_csv(
-        os.path.join(output_dir,
-                     f"{output_filename_prefix}_processed_{start_date_time.strftime("%Y%m%d")}_{end_date_time.strftime("%Y%m%d")}.csv"))
-
-    return
-
-
-# todo add tests
-def calculate_wind_speed(u: Union[float, int], v: Union[float, int]) -> float:
-    """
-    Args:
-        u: The u component of the wind vector, which can be a float or an int.
-        v: The v component of the wind vector, which can be a float or an int.
-    Returns:
-        Wind speed. If both u and v are zero, it returns 0.0.
-    """
-    if u == 0 and v == 0:
-        return 0.0
-    return np.sqrt(u ** 2 + v ** 2)
-
-def wind_components_to_speed(ua: Union[float, int, List[float], List[int]], va: Union[float, int, List[float], List[int]]) -> Union[float, List[float]]:
-    """
-    Convert wind components ua and va to wind speed v.
-    Args:
-        ua (Union[float, int, List[float], List[int]]): The u-component of the wind.
-        va (Union[float, int, List[float], List[int]]): The v-component of the wind.
-    Returns:
-        float or List[float]: The calculated wind speed.
-    Raises:
-        ValueError: If the input types do not match or if they are neither List[float] nor float.
-    """
-    if isinstance(ua, (float, int)) and isinstance(va, (float, int)):
-        if ua == 0 and va == 0:
-            return 0.0
-        return calculate_wind_speed(ua, va)
-    elif isinstance(ua, list) and isinstance(va, list):
-        if not all(isinstance(num, (float, int)) for num in ua + va):
-            raise ValueError("All elements in both lists must be either float or int.")
-        if len(ua) != len(va):
-            raise ValueError("Both lists must be of the same length.")
-        return [calculate_wind_speed(u, v) for u, v in zip(ua, va)]
-    else:
-        raise ValueError("Both arguments must be either both float/int or both lists of float/int.")
-
+# # todo draft function to process csvs
+# def process_csvs:
+#       # process barra2 variables to wind speed and direction todo split into modules
+#
+#     # initiate DataFrame for adding new columns
+#     df_processed = df_combined
+#
+#     # loop through df_combined to df_processed
+#     for tup in barra2_wind_speeds:
+#         mask_wind_speed_h = tup[0][2:]
+#         mask_ua = df_combined.columns.str.contains(tup[0])  # selects column header
+#         mask_va = df_combined.columns.str.contains(tup[1])  # selects column header
+#
+#         if np.any(mask_ua == True) and np.any(mask_va == True):
+#             df_processed_ua = df_combined.loc[:, mask_ua]  # selects mask
+#             df_processed_va = df_combined.loc[:, mask_va]  # selects mask
+#
+#             print('Converted: ' + tup.__str__())
+#
+#             df_processed_v = pd.DataFrame(np.sqrt(df_processed_ua.iloc[:, 0] ** 2 + df_processed_va.iloc[:, 0] ** 2))
+#             df_processed_v.columns = ['v' + mask_wind_speed_h + '[unit="m s-1"]']
+#
+#             df_processed_phi_met = pd.DataFrame()
+#
+#             for index, row in df_combined.iterrows():
+#                 if (df_processed_ua.iloc[index, 0] == 0) and (df_processed_va.iloc[index, 0] == 0):
+#                     df_processed_phi_met.loc[index, 'v' + mask_wind_speed_h + '_' + 'phi_met[unit="degrees"]'] = 0.0
+#                 else:
+#                     df_processed_phi_met.loc[index, 'v' + mask_wind_speed_h + '_' + 'phi_met[unit="degrees"]'] = (
+#                         np.mod(180 + np.rad2deg(
+#                             np.arctan2(df_processed_ua.iloc[index, 0], df_processed_va.iloc[index, 0])), 360))
+#
+#             # Merge the current variable DataFrame with the combined DataFrame
+#             df_processed = df_processed.join(df_processed_v)
+#             df_processed = df_processed.join(df_processed_phi_met)
+#
+#     # export combined to csv
+#     df_processed.to_csv(
+#         os.path.join(output_dir,
+#                      f"{output_filename_prefix}_processed_{start_date_time.strftime("%Y%m%d")}_{end_date_time.strftime("%Y%m%d")}.csv"))
+#
+#     return
 
-# todo add tests
-def calculate_wind_direction(u: Union[float, int], v: Union[float, int]) -> float:
-    """
-    Args:
-        u: The u component of the wind vector, which can be a float or an int.
-        v: The v component of the wind vector, which can be a float or an int.
-    Returns:
-        Wind direction in degrees. If both u and v are zero, it returns 0.0.
-    """
-    if u == 0 and v == 0:
-        return 0.0
-    return np.mod(180 + np.rad2deg(np.arctan2(u, v)), 360)
-
-def wind_components_to_direction(ua: Union[float, int, List[float], List[int]], va: Union[float, int, List[float], List[int]]) -> Union[float, List[float]]:
-    """
-    Convert wind components ua and va to wind direction phi.
-    Args:
-        ua (Union[float, int, List[float], List[int]]): The u-component of the wind.
-        va (Union[float, int, List[float], List[int]]): The v-component of the wind.
-    Returns:
-        float or List[float]: The calculated wind speed.
-    Raises:
-        ValueError: If the input types and incorrect or do not match or if lists of different lengths are provided.
-    """
 
-    if isinstance(ua, (float, int)) and isinstance(va, (float, int)):
-        return calculate_wind_direction(ua, va)
-    elif isinstance(ua, list) and isinstance(va, list):
-        if not all(isinstance(num, (float, int)) for num in ua + va):
-            raise ValueError("All elements in both lists must be either float or int.")
-        if len(ua) != len(va):
-            raise ValueError("Both lists must be of the same length.")
-        return [calculate_wind_direction(u, v) for u, v in zip(ua, va)]
-    else:
-        raise ValueError("Both arguments must be either both float/int or both lists of float/int.")
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		from . import globals, helpers, downloaders
		from . import globals, downloader, merger, helpers