From e25e8cb82b7e206aaad7837f2c5613080a2636b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Therese=20Natter=C3=B8y?= <61694854+tnatt@users.noreply.github.com> Date: Wed, 30 Mar 2022 09:19:53 +0200 Subject: [PATCH] Add backwards compatibility for older portables using `ParameterAnalysis` and `PropertyStatistic` (#995) * add backwards compatibility for older portables * get run_mode from WEBVIZ_INSTANCE_INFO --- .../data_loaders/__init__.py | 11 ++ .../models/simulation_timeseries_model.py | 1 + .../_parameter_analysis/parameter_analysis.py | 109 +++++++++++------ .../data_loaders/__init__.py | 11 ++ .../models/simulation_timeseries_model.py | 1 + .../property_statistics.py | 111 +++++++++++------- 6 files changed, 163 insertions(+), 81 deletions(-) create mode 100644 webviz_subsurface/plugins/_parameter_analysis/data_loaders/__init__.py create mode 100644 webviz_subsurface/plugins/_property_statistics/data_loaders/__init__.py diff --git a/webviz_subsurface/plugins/_parameter_analysis/data_loaders/__init__.py b/webviz_subsurface/plugins/_parameter_analysis/data_loaders/__init__.py new file mode 100644 index 000000000..544f333aa --- /dev/null +++ b/webviz_subsurface/plugins/_parameter_analysis/data_loaders/__init__.py @@ -0,0 +1,11 @@ +import pathlib + +import pandas as pd +from webviz_config.common_cache import CACHE +from webviz_config.webviz_store import webvizstore + + +@CACHE.memoize(timeout=CACHE.TIMEOUT) +@webvizstore +def read_csv(csv_file: pathlib.Path) -> pd.DataFrame: + return pd.read_csv(csv_file) diff --git a/webviz_subsurface/plugins/_parameter_analysis/models/simulation_timeseries_model.py b/webviz_subsurface/plugins/_parameter_analysis/models/simulation_timeseries_model.py index ecf691354..2fa795ccf 100644 --- a/webviz_subsurface/plugins/_parameter_analysis/models/simulation_timeseries_model.py +++ b/webviz_subsurface/plugins/_parameter_analysis/models/simulation_timeseries_model.py @@ -27,6 +27,7 @@ def __init__( if column not in dataframe.columns: raise KeyError(f"{column} column is missing from UNSMRY data") + dataframe = dataframe.copy() # ensure correct format of date dataframe["REAL"] = dataframe["REAL"].astype(int) dataframe["DATE"] = pd.to_datetime(dataframe["DATE"]) diff --git a/webviz_subsurface/plugins/_parameter_analysis/parameter_analysis.py b/webviz_subsurface/plugins/_parameter_analysis/parameter_analysis.py index 2c0521834..5b4134e3a 100644 --- a/webviz_subsurface/plugins/_parameter_analysis/parameter_analysis.py +++ b/webviz_subsurface/plugins/_parameter_analysis/parameter_analysis.py @@ -1,18 +1,25 @@ from pathlib import Path -from typing import Optional, Tuple +from typing import Optional, Tuple, Union import pandas as pd from dash import dcc from webviz_config import WebvizPluginABC, WebvizSettings from webviz_config.deprecation_decorators import deprecated_plugin_arguments +from webviz_config.webviz_instance_info import WEBVIZ_INSTANCE_INFO, WebvizRunMode +from webviz_subsurface._models import ( + EnsembleSetModel, + caching_ensemble_set_model_factory, +) from webviz_subsurface._providers import ( EnsembleSummaryProviderFactory, EnsembleTableProviderFactory, + EnsembleTableProviderSet, Frequency, ) from .controllers import parameter_qc_controller, parameter_response_controller +from .data_loaders import read_csv from .models import ( ParametersModel, ProviderTimeSeriesDataModel, @@ -81,14 +88,18 @@ def __init__( column_keys: Optional[list] = None, drop_constants: bool = True, rel_file_pattern: str = "share/results/unsmry/*.arrow", - csvfile_parameters: Optional[Path] = None, - csvfile_smry: Optional[Path] = None, + csvfile_parameters: Path = None, + csvfile_smry: Path = None, ): super().__init__() self.theme = webviz_settings.theme self.ensembles = ensembles + self.vmodel: Optional[ + Union[SimulationTimeSeriesModel, ProviderTimeSeriesDataModel] + ] = None + run_mode_portable = WEBVIZ_INSTANCE_INFO.run_mode == WebvizRunMode.PORTABLE table_provider = EnsembleTableProviderFactory.instance() if ensembles is not None: @@ -102,55 +113,77 @@ def __init__( resampling_frequency = Frequency(time_index) provider_factory = EnsembleSummaryProviderFactory.instance() - provider_set = {} - for ens, ens_path in ensemble_paths.items(): - try: - provider_set[ - ens - ] = provider_factory.create_from_arrow_unsmry_presampled( + try: + provider_set = { + ens: provider_factory.create_from_arrow_unsmry_presampled( str(ens_path), rel_file_pattern, resampling_frequency ) - except ValueError as error: - message = ( - f"No arrow files found at {rel_file_pattern} for ensemble {ens}. \n" - "If no arrow files have been generated with `ERT` using `ECL2CSV`, " - "the commandline tool `smry2arrow_batch` can be used to generate arrow " - "files for an ensemble" + for ens, ens_path in ensemble_paths.items() + } + self.vmodel = ProviderTimeSeriesDataModel( + provider_set=provider_set, column_keys=column_keys + ) + parameter_df = create_df_from_table_provider( + table_provider.create_provider_set_from_per_realization_parameter_file( + ensemble_paths ) + ) + except ValueError as error: + message = ( + f"Some/all ensembles are missing arrow files at {rel_file_pattern}.\n" + "If no arrow files have been generated with `ERT` using `ECL2CSV`, " + "the commandline tool `smry2arrow_batch` can be used to generate arrow " + "files for an ensemble" + ) + if not run_mode_portable: raise ValueError(message) from error - self.vmodel = ProviderTimeSeriesDataModel( - provider_set=provider_set, column_keys=column_keys - ) - - parameterproviderset = ( - table_provider.create_provider_set_from_per_realization_parameter_file( - ensemble_paths + # NOTE: this part below is to ensure backwards compatibility for portable app's + # created before the arrow support. It should be removed in the future. + emodel: EnsembleSetModel = ( + caching_ensemble_set_model_factory.get_or_create_model( + ensemble_paths=ensemble_paths, + time_index=time_index, + column_keys=column_keys, + ) ) - ) + self.vmodel = SimulationTimeSeriesModel( + dataframe=emodel.get_or_load_smry_cached() + ) + parameter_df = emodel.load_parameters() elif csvfile_parameters is None: raise ValueError("Either ensembles or csvfile_parameters must be specified") else: - parameterproviderset = ( - table_provider.create_provider_set_from_aggregated_csv_file( - csvfile_parameters - ) - ) - if csvfile_smry is not None: - smryprovider = ( + # NOTE: the try/except is for backwards compatibility with existing portable app's. + # It should be removed in the future together with the support of aggregated csv-files + try: + parameter_df = create_df_from_table_provider( table_provider.create_provider_set_from_aggregated_csv_file( - csvfile_smry + csvfile_parameters ) ) - self.vmodel = SimulationTimeSeriesModel( - dataframe=create_df_from_table_provider(smryprovider) - ) - else: - self.vmodel = None + except FileNotFoundError: + if not run_mode_portable: + raise + parameter_df = read_csv(csvfile_parameters) + + if csvfile_smry is not None: + try: + smry_df = create_df_from_table_provider( + table_provider.create_provider_set_from_aggregated_csv_file( + csvfile_smry + ) + ) + except FileNotFoundError: + if not run_mode_portable: + raise + smry_df = read_csv(csvfile_smry) + + self.vmodel = SimulationTimeSeriesModel(dataframe=smry_df) self.pmodel = ParametersModel( - dataframe=create_df_from_table_provider(parameterproviderset), + dataframe=parameter_df, theme=self.theme, drop_constants=drop_constants, ) @@ -177,7 +210,7 @@ def set_callbacks(self, app) -> None: ) -def create_df_from_table_provider(provider) -> pd.DataFrame: +def create_df_from_table_provider(provider: EnsembleTableProviderSet) -> pd.DataFrame: dfs = [] for ens in provider.ensemble_names(): df = provider.ensemble_provider(ens).get_column_data( diff --git a/webviz_subsurface/plugins/_property_statistics/data_loaders/__init__.py b/webviz_subsurface/plugins/_property_statistics/data_loaders/__init__.py new file mode 100644 index 000000000..544f333aa --- /dev/null +++ b/webviz_subsurface/plugins/_property_statistics/data_loaders/__init__.py @@ -0,0 +1,11 @@ +import pathlib + +import pandas as pd +from webviz_config.common_cache import CACHE +from webviz_config.webviz_store import webvizstore + + +@CACHE.memoize(timeout=CACHE.TIMEOUT) +@webvizstore +def read_csv(csv_file: pathlib.Path) -> pd.DataFrame: + return pd.read_csv(csv_file) diff --git a/webviz_subsurface/plugins/_property_statistics/models/simulation_timeseries_model.py b/webviz_subsurface/plugins/_property_statistics/models/simulation_timeseries_model.py index 57485cd29..db853f0e0 100644 --- a/webviz_subsurface/plugins/_property_statistics/models/simulation_timeseries_model.py +++ b/webviz_subsurface/plugins/_property_statistics/models/simulation_timeseries_model.py @@ -24,6 +24,7 @@ def __init__( if column not in dataframe.columns: raise KeyError(f"{column} column is missing from UNSMRY data") + dataframe = dataframe.copy() # ensure correct format of date dataframe["REAL"] = dataframe["REAL"].astype(int) dataframe["DATE"] = pd.to_datetime(dataframe["DATE"]) diff --git a/webviz_subsurface/plugins/_property_statistics/property_statistics.py b/webviz_subsurface/plugins/_property_statistics/property_statistics.py index d51b1ed36..6f6f4a3b7 100644 --- a/webviz_subsurface/plugins/_property_statistics/property_statistics.py +++ b/webviz_subsurface/plugins/_property_statistics/property_statistics.py @@ -5,19 +5,23 @@ from dash import Dash, dcc from webviz_config import WebvizConfigTheme, WebvizPluginABC, WebvizSettings from webviz_config.deprecation_decorators import deprecated_plugin_arguments +from webviz_config.webviz_instance_info import WEBVIZ_INSTANCE_INFO, WebvizRunMode +from webviz_subsurface._models import ( + EnsembleSetModel, + caching_ensemble_set_model_factory, +) from webviz_subsurface._providers import ( EnsembleSummaryProviderFactory, EnsembleTableProviderFactory, - Frequency, -) -from webviz_subsurface._providers.ensemble_table_provider import ( EnsembleTableProviderSet, + Frequency, ) from .controllers.property_delta_controller import property_delta_controller from .controllers.property_qc_controller import property_qc_controller from .controllers.property_response_controller import property_response_controller +from .data_loaders import read_csv from .models import ( PropertyStatisticsModel, ProviderTimeSeriesDataModel, @@ -93,17 +97,17 @@ def __init__( surface_renaming: Optional[dict] = None, time_index: str = "monthly", column_keys: Optional[list] = None, - csvfile_statistics: Optional[Path] = None, - csvfile_smry: Optional[Path] = None, + csvfile_statistics: Path = None, + csvfile_smry: Path = None, ): super().__init__() self.theme: WebvizConfigTheme = webviz_settings.theme self.ensembles = ensembles - self._surface_folders: Union[dict, None] + self._surface_folders: Union[dict, None] = None self._vmodel: Optional[ Union[SimulationTimeSeriesModel, ProviderTimeSeriesDataModel] ] = None - + run_mode_portable = WEBVIZ_INSTANCE_INFO.run_mode == WebvizRunMode.PORTABLE table_provider = EnsembleTableProviderFactory.instance() if ensembles is not None: @@ -117,32 +121,45 @@ def __init__( resampling_frequency = Frequency(time_index) provider_factory = EnsembleSummaryProviderFactory.instance() - provider_set = {} - for ens, ens_path in ensemble_paths.items(): - try: - provider_set[ - ens - ] = provider_factory.create_from_arrow_unsmry_presampled( + try: + provider_set = { + ens: provider_factory.create_from_arrow_unsmry_presampled( str(ens_path), rel_file_pattern, resampling_frequency ) - except ValueError as error: - message = ( - f"No arrow files found at {rel_file_pattern} for ensemble {ens}. \n" - "If no arrow files have been generated with `ERT` using `ECL2CSV`, " - "the commandline tool `smry2arrow_batch` can be used to generate arrow " - "files for an ensemble" + for ens, ens_path in ensemble_paths.items() + } + self._vmodel = ProviderTimeSeriesDataModel( + provider_set=provider_set, column_keys=column_keys + ) + property_df = create_df_from_table_provider( + table_provider.create_provider_set_from_per_realization_csv_file( + ensemble_paths, statistics_file ) + ) + except ValueError as error: + message = ( + f"Some/all ensembles are missing arrow files at {rel_file_pattern}.\n" + "If no arrow files have been generated with `ERT` using `ECL2CSV`, " + "the commandline tool `smry2arrow_batch` can be used to generate arrow " + "files for an ensemble" + ) + if not run_mode_portable: raise ValueError(message) from error - self._vmodel = ProviderTimeSeriesDataModel( - provider_set=provider_set, column_keys=column_keys - ) - - propertyproviderset = ( - table_provider.create_provider_set_from_per_realization_csv_file( - ensemble_paths, statistics_file + # NOTE: this part below is to ensure backwards compatibility for portable app's + # created before the arrow support. It should be removed in the future. + emodel: EnsembleSetModel = ( + caching_ensemble_set_model_factory.get_or_create_model( + ensemble_paths=ensemble_paths, + time_index=time_index, + column_keys=column_keys, + ) ) - ) + self._vmodel = SimulationTimeSeriesModel( + dataframe=emodel.get_or_load_smry_cached() + ) + property_df = emodel.load_csv(csv_file=Path(statistics_file)) + self._surface_folders = { ens: Path(ens_path.split("realization")[0]) / "share/results/maps" / ens for ens, ens_path in ensemble_paths.items() @@ -153,26 +170,34 @@ def __init__( raise ValueError( "If not 'ensembles', then csvfile_statistics must be provided" ) - propertyproviderset = ( - table_provider.create_provider_set_from_aggregated_csv_file( - csvfile_statistics - ) - ) - if csvfile_smry is not None: - smryprovider = ( + # NOTE: the try/except is for backwards compatibility with existing portable app's. + # It should be removed in the future together with the support of aggregated csv-files + try: + property_df = create_df_from_table_provider( table_provider.create_provider_set_from_aggregated_csv_file( - csvfile_smry + csvfile_statistics ) ) - self._vmodel = SimulationTimeSeriesModel( - dataframe=create_df_from_table_provider(smryprovider) - ) - self._surface_folders = None + except FileNotFoundError: + if not run_mode_portable: + raise + property_df = read_csv(csvfile_statistics) - self._pmodel = PropertyStatisticsModel( - dataframe=create_df_from_table_provider(propertyproviderset), - theme=self.theme, - ) + if csvfile_smry is not None: + try: + smry_df = create_df_from_table_provider( + table_provider.create_provider_set_from_aggregated_csv_file( + csvfile_smry + ) + ) + except FileNotFoundError: + if not run_mode_portable: + raise + smry_df = read_csv(csvfile_smry) + + self._vmodel = SimulationTimeSeriesModel(dataframe=smry_df) + + self._pmodel = PropertyStatisticsModel(dataframe=property_df, theme=self.theme) self._surface_renaming = surface_renaming if surface_renaming else {} self._surface_table = generate_surface_table(