diff --git a/map2loop/config.py b/map2loop/config.py index d4811d97..48d017d3 100644 --- a/map2loop/config.py +++ b/map2loop/config.py @@ -93,13 +93,19 @@ def to_dict(self): } @beartype.beartype - def update_from_dictionary(self, dictionary: dict, lower: bool = False): + def update_from_dictionary(self, dictionary: dict, lower: bool = True): """ Update the config dictionary from a provided dict Args: dictionary (dict): The dictionary to update from """ + # make sure dictionary doesn't contain legacy keys + self.check_for_legacy_keys(dictionary) + + # make sure it has the minimum requirements + self.validate_config_dictionary(dictionary) + if "structure" in dictionary: self.structure_config.update(dictionary["structure"]) for key in dictionary["structure"].keys(): @@ -108,6 +114,7 @@ def update_from_dictionary(self, dictionary: dict, lower: bool = False): f"Config dictionary structure segment contained {key} which is not used" ) dictionary.pop("structure") + if "geology" in dictionary: self.geology_config.update(dictionary["geology"]) for key in dictionary["geology"].keys(): @@ -135,82 +142,19 @@ def update_from_dictionary(self, dictionary: dict, lower: bool = False): if len(dictionary): logger.warning(f"Unused keys from config format {list(dictionary.keys())}") - @beartype.beartype - def update_from_legacy_file(self, file_map: dict, lower: bool = False): - """ - Update the config dictionary from the provided old version dictionary - - Args: - file_map (dict): The old version dictionary to update from - """ - - code_mapping = { - "otype": (self.structure_config, "orientation_type"), - "dd": (self.structure_config, "dipdir_column"), - "d": (self.structure_config, "dip_column"), - "sf": (self.structure_config, "description_column"), - "bedding": (self.structure_config, "bedding_text"), - "bo": (self.structure_config, "overturned_column"), - "btype": (self.structure_config, "overturned_text"), - "gi": (self.structure_config, "objectid_column"), - "c": (self.geology_config, "unitname_column"), - "u": (self.geology_config, "alt_unitname_column"), - "g": (self.geology_config, "group_column"), - "g2": (self.geology_config, "supergroup_column"), - "ds": (self.geology_config, "description_column"), - "min": (self.geology_config, "minage_column"), - "max": (self.geology_config, "maxage_column"), - "r1": (self.geology_config, "rocktype_column"), - "r2": (self.geology_config, "alt_rocktype_column"), - "sill": (self.geology_config, "sill_text"), - "intrusive": (self.geology_config, "intrusive_text"), - "volcanic": (self.geology_config, "volcanic_text"), - "f": (self.fault_config, "structtype_column"), - "fault": (self.fault_config, "fault_text"), - "fdipnull": (self.fault_config, "dip_null_value"), - "fdipdip_flag": (self.fault_config, "dipdir_flag"), - "fdipdir": (self.fault_config, "dipdir_column"), - "fdip": (self.fault_config, "dip_column"), - "fdipest": (self.fault_config, "dipestimate_column"), - "fdipest_vals": (self.fault_config, "dipestimate_text"), - "n": (self.fault_config, "name_column"), - "ff": (self.fold_config, "structtype_column"), - "fold": (self.fold_config, "fold_text"), - "t": (self.fold_config, "description_column"), - "syn": (self.fold_config, "synform_text"), - } - for code in code_mapping: - if code in file_map: - if lower is True: - file_map[code] = str(file_map[code]).lower() - code_mapping[code][0][code_mapping[code][1]] = file_map[code] - file_map.pop(code) - - if "o" in file_map: - self.structure_config["objectid_column"] = file_map["o"] - self.fault_config["objectid_column"] = file_map["o"] - self.fold_config["objectid_column"] = file_map["o"] - file_map.pop("o") - - if len(file_map) > 0: - logger.warning(f"Unused keys from legacy format {list(file_map.keys())}") @beartype.beartype def update_from_file( - self, filename: Union[pathlib.Path, str], legacy_format: bool = False, lower: bool = False + self, filename: Union[pathlib.Path, str], lower: bool = False ): """ Update the config dictionary from the provided json filename or url Args: filename (Union[pathlib.Path, str]): Filename or URL of the JSON config file - legacy_format (bool, optional): Whether the JSON is an old version. Defaults to False. lower (bool, optional): convert keys to lowercase. Defaults to False. """ - if legacy_format: - func = self.update_from_legacy_file - else: - func = self.update_from_dictionary + func = self.update_from_dictionary try: filename = str(filename) @@ -269,7 +213,60 @@ def update_from_file( err_string += "Please check the file is accessible online and then\n" else: err_string += "Please check the file exists and is accessible then\n" - if not legacy_format: - err_string += "Also check if this is a legacy config file and add clut_file_legacy=True to the Project function\n" err_string += "Check the contents for mismatched quotes or brackets!" raise Exception(err_string) + + @beartype.beartype + def validate_config_dictionary(self, config_dict: dict) -> None: + """ + Validate the structure and keys of the configuration dictionary. + + Args: + config_dict (dict): The config dictionary to validate. + + Raises: + ValueError: If the dictionary does not meet the minimum requirements for ma2p2loop. + """ + required_keys = { + "structure": {"dipdir_column", "dip_column"}, + "geology": {"unitname_column", "alt_unitname_column"}, + } + + for section, keys in required_keys.items(): + if section not in config_dict: + logger.error(f"Missing required section '{section}' in config dictionary.") + raise ValueError(f"Missing required section '{section}' in config dictionary.") + + for key in keys: + if key not in config_dict[section]: + logger.error( + f"Missing required key '{key}' for '{section}' section of the config dictionary." + ) + raise ValueError( + f"Missing required key '{key}' for '{section}' section of the config dictionary." + ) + + @beartype.beartype + def check_for_legacy_keys(self, config_dict: dict) -> None: + + legacy_keys = { + "otype", "dd", "d", "sf", "bedding", "bo", "btype", "gi", "c", "u", + "g", "g2", "ds", "min", "max", "r1", "r2", "sill", "intrusive", "volcanic", + "f", "fdipnull", "fdipdip_flag", "fdipdir", "fdip", "fdipest", + "fdipest_vals", "n", "ff", "t", "syn" + } + + # Recursively search for keys in the dictionary + def check_keys(d: dict, parent_key=""): + for key, value in d.items(): + if key in legacy_keys: + logger.error( + f"Legacy key found in config - '{key}' at '{parent_key + key}'. Please use the new config format. Use map2loop.utils.update_from_legacy_file to convert between the formats if needed" + ) + raise ValueError( + f"Legacy key found in config - '{key}' at '{parent_key + key}'. Please use the new config format. Use map2loop.utils.update_from_legacy_file to convert between the formats if needed" + ) + if isinstance(value, dict): + check_keys(value, parent_key=f"{parent_key}{key}.") + + check_keys(config_dict) \ No newline at end of file diff --git a/map2loop/mapdata.py b/map2loop/mapdata.py index 47562dcc..c1c8b653 100644 --- a/map2loop/mapdata.py +++ b/map2loop/mapdata.py @@ -255,7 +255,7 @@ def get_filename(self, datatype: Datatype): @beartype.beartype def set_config_filename( - self, filename: Union[pathlib.Path, str], legacy_format: bool = False, lower: bool = False + self, filename: Union[pathlib.Path, str], lower: bool = False ): """ Set the config filename and update the config structure @@ -263,12 +263,13 @@ def set_config_filename( Args: filename (str): The filename of the config file - legacy_format (bool, optional): - Whether the file is in m2lv2 form. Defaults to False. + lower (bool, optional): + Flag to convert the config file to lowercase. Defaults to False. """ logger.info('Setting config filename to {filename}') - self.config_filename = filename - self.config.update_from_file(filename, legacy_format=legacy_format, lower=lower) + + self.config.update_from_file(filename, lower=lower) + logger.info(f"Config is: {self.config.to_dict()}") def get_config_filename(self): @@ -399,7 +400,7 @@ def set_filenames_from_australian_state(self, state: str): else: self.set_config_filename( - AustraliaStateUrls.aus_config_urls[state], legacy_format=False, lower=lower + AustraliaStateUrls.aus_config_urls[state], lower=lower ) self.set_colour_filename(AustraliaStateUrls.aus_clut_urls[state]) else: diff --git a/map2loop/project.py b/map2loop/project.py index 34359059..39aac197 100644 --- a/map2loop/project.py +++ b/map2loop/project.py @@ -70,7 +70,6 @@ def __init__( config_filename: Union[pathlib.Path, str] = "", config_dictionary: dict = {}, clut_filename: Union[pathlib.Path, str] = "", - clut_file_legacy: bool = False, save_pre_checked_map_data: bool = False, loop_project_filename: str = "", overwrite_loopprojectfile: bool = False, @@ -108,8 +107,6 @@ def __init__( A dictionary version of the configuration file. Defaults to {}. clut_filename (str, optional): The filename of the colour look up table to use. Defaults to "". - clut_file_legacy (bool, optional): - A flag to indicate if the clut file is in the legacy format. Defaults to False. save_pre_checked_map_data (bool, optional): A flag to save all map data to file before use. Defaults to False. loop_project_filename (str, optional): @@ -147,6 +144,7 @@ def __init__( self.fold_samples = pandas.DataFrame(columns=["ID", "X", "Y", "Z", "featureId"]) self.geology_samples = pandas.DataFrame(columns=["ID", "X", "Y", "Z", "featureId"]) + # Check for alternate config filenames in kwargs if "metadata_filename" in kwargs and config_filename == "": config_filename = kwargs["metadata_filename"] @@ -203,19 +201,18 @@ def __init__( self.map_data.set_filename(Datatype.DTM, dtm_filename) if fault_orientation_filename != "": self.map_data.set_filename(Datatype.FAULT_ORIENTATION, fault_orientation_filename) - + if config_filename != "": - if clut_file_legacy: - logger.warning( - "DEPRECATION: Legacy files are deprecated and their use will be removed in v3.2" - ) - - self.map_data.set_config_filename(config_filename, legacy_format=clut_file_legacy) + self.map_data.set_config_filename(config_filename) if config_dictionary != {}: self.map_data.config.update_from_dictionary(config_dictionary) + if clut_filename != "": self.map_data.set_colour_filename(clut_filename) + + + # Load all data (both shape and raster) self.map_data.load_all_map_data() diff --git a/map2loop/sorter.py b/map2loop/sorter.py index cf444de6..42da4617 100644 --- a/map2loop/sorter.py +++ b/map2loop/sorter.py @@ -118,8 +118,8 @@ def sort( class SorterUseHint(SorterUseNetworkX): def __init__(self): - print( - "SorterUseHint is deprecated and will be removed in map2loop v3.2. Use SorterUseNetworkX instead" + logger.info( + "SorterUseHint is deprecated in v3.2. Use SorterUseNetworkX instead" ) super().__init__() diff --git a/map2loop/utils.py b/map2loop/utils.py index e00c0f91..a94f29d4 100644 --- a/map2loop/utils.py +++ b/map2loop/utils.py @@ -5,6 +5,11 @@ import beartype from typing import Union, Optional, Dict import pandas +import re +import json + +from .logging import getLogger +logger = getLogger(__name__) @beartype.beartype @@ -401,3 +406,115 @@ def calculate_minimum_fault_length( # Return the square root of the threshold area as the minimum fault length return threshold_area**0.5 + + +def preprocess_hjson_to_json(hjson_content): + # Remove comments + hjson_content = re.sub(r'#.*', '', hjson_content) + hjson_content = re.sub(r'//.*', '', hjson_content) + # Replace single quotes with double quotes + hjson_content = re.sub(r"(? dict: + try: + # Read the file + with open(file_path, "r", encoding="utf-8") as file: + hjson_content = file.read() + if not hjson_content.strip(): + raise ValueError("The HJSON file is empty.") + # Preprocess HJSON to JSON + preprocessed_content = preprocess_hjson_to_json(hjson_content) + # Parse JSON + return json.loads(preprocessed_content) + except FileNotFoundError as e: + raise FileNotFoundError(f"HJSON file not found: {file_path}") from e + except json.JSONDecodeError as e: + raise ValueError(f"Failed to decode preprocessed HJSON as JSON: {e}") from e + + +def update_from_legacy_file( + filename: str, + json_save_path: Optional[str] = None, + lower: bool = False +) -> Optional[Dict[str, Dict]]: + """ + Update the config dictionary from the provided old version dictionary + Args: + file_map (dict): The old version dictionary to update from + """ + # only import config if needed + from .config import Config + file_map = Config() + + code_mapping = { + "otype": (file_map.structure_config, "orientation_type"), + "dd": (file_map.structure_config, "dipdir_column"), + "d": (file_map.structure_config, "dip_column"), + "sf": (file_map.structure_config, "description_column"), + "bedding": (file_map.structure_config, "bedding_text"), + "bo": (file_map.structure_config, "overturned_column"), + "btype": (file_map.structure_config, "overturned_text"), + "gi": (file_map.structure_config, "objectid_column"), + "c": (file_map.geology_config, "unitname_column"), + "u": (file_map.geology_config, "alt_unitname_column"), + "g": (file_map.geology_config, "group_column"), + "g2": (file_map.geology_config, "supergroup_column"), + "ds": (file_map.geology_config, "description_column"), + "min": (file_map.geology_config, "minage_column"), + "max": (file_map.geology_config, "maxage_column"), + "r1": (file_map.geology_config, "rocktype_column"), + "r2": (file_map.geology_config, "alt_rocktype_column"), + "sill": (file_map.geology_config, "sill_text"), + "intrusive": (file_map.geology_config, "intrusive_text"), + "volcanic": (file_map.geology_config, "volcanic_text"), + "f": (file_map.fault_config, "structtype_column"), + "fault": (file_map.fault_config, "fault_text"), + "fdipnull": (file_map.fault_config, "dip_null_value"), + "fdipdip_flag": (file_map.fault_config, "dipdir_flag"), + "fdipdir": (file_map.fault_config, "dipdir_column"), + "fdip": (file_map.fault_config, "dip_column"), + "fdipest": (file_map.fault_config, "dipestimate_column"), + "fdipest_vals": (file_map.fault_config, "dipestimate_text"), + "n": (file_map.fault_config, "name_column"), + "ff": (file_map.fold_config, "structtype_column"), + "fold": (file_map.fold_config, "fold_text"), + "t": (file_map.fold_config, "description_column"), + "syn": (file_map.fold_config, "synform_text"), + } + # try and ready the file: + try: + parsed_data = read_hjson_with_json(filename) + except Exception as e: + logger.error(f"Error reading file {filename}: {e}") + return + #map the keys + file_map = file_map.to_dict() + for legacy_key, new_mapping in code_mapping.items(): + if legacy_key in parsed_data: + section, new_key = new_mapping + value = parsed_data[legacy_key] + if lower and isinstance(value, str): + value = value.lower() + section[new_key] = value + + if "o" in parsed_data: + object_id_value = parsed_data["o"] + if lower and isinstance(object_id_value, str): + object_id_value = object_id_value.lower() + file_map['structure']["objectid_column"] = object_id_value + file_map['geology']["objectid_column"] = object_id_value + file_map['fold']["objectid_column"] = object_id_value + + if json_save_path is not None: + with open(json_save_path, "w") as f: + json.dump(parsed_data, f, indent=4) + + return file_map \ No newline at end of file diff --git a/tests/project/test_plot_hamersley.py b/tests/project/test_plot_hamersley.py index 7be0bb55..07393f27 100644 --- a/tests/project/test_plot_hamersley.py +++ b/tests/project/test_plot_hamersley.py @@ -23,7 +23,6 @@ def create_project(state_data="WA", projection="EPSG:28350"): use_australian_state_data=state_data, working_projection=projection, bounding_box=bbox_3d, - clut_file_legacy=False, verbose_level=VerboseLevel.NONE, loop_project_filename=loop_project_filename, overwrite_loopprojectfile=True,