diff --git a/CHANGELOG.md b/CHANGELOG.md index fcc4862ee..a3ff8f998 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased +### Fixed +- [#505](https://github.com/equinor/webviz-subsurface/pull/505) - Fixed recent performance regression issue for loading of UNSMRY data. Loading times when multiple plugins are using the same data is now significantly reduced. Note that all UNSMRY vectors are now stored in portable apps, independent of choice of column_keys in individual plugins. + ## [0.1.5] - 2020-11-26 ### Added - [#478](https://github.com/equinor/webviz-subsurface/pull/478) - New plugin `AssistedHistoryMatchingAnalysis`. This dashboard helps to analyze the update step performed during assisted history match. E.g. which observations are causing an update in a specific parameter. Based on Kolmogorov–Smirnov. diff --git a/tests/conftest.py b/tests/conftest.py index 5fcdd6166..a9729d4b7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,8 @@ import pathlib import pytest +import dash +from webviz_config.common_cache import CACHE def pytest_addoption(parser): @@ -15,3 +17,13 @@ def pytest_addoption(parser): @pytest.fixture def testdata_folder(request): return request.config.getoption("--testdata-folder") + + +@pytest.fixture() +def app(): + dash_app = dash.Dash(__name__) + dash_app.css.config.serve_locally = True + dash_app.scripts.config.serve_locally = True + dash_app.config.suppress_callback_exceptions = True + CACHE.init_app(dash_app.server) + yield dash_app diff --git a/tests/unit_tests/__init__.py b/tests/unit_tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit_tests/model_tests/__init__.py b/tests/unit_tests/model_tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit_tests/model_tests/test_ensemble_model.py b/tests/unit_tests/model_tests/test_ensemble_model.py new file mode 100644 index 000000000..22986851b --- /dev/null +++ b/tests/unit_tests/model_tests/test_ensemble_model.py @@ -0,0 +1,187 @@ +from pathlib import Path +import datetime + +import pytest +import numpy as np + +from webviz_subsurface._models.ensemble_model import EnsembleModel + + +@pytest.mark.usefixtures("app") +def test_ensemble_set_init(testdata_folder): + + emodel = EnsembleModel( + ensemble_name="iter-0", + ensemble_path=Path(testdata_folder) + / "reek_history_match" + / "realization-*" + / "iter-0", + ) + assert emodel.ens_folder == {"iter-0": f"{testdata_folder}/reek_history_match/"} + assert emodel.webviz_store == [] + + +@pytest.mark.usefixtures("app") +def test_bad_ensemble_path(): + emodel = EnsembleModel(ensemble_name="iter-0", ensemble_path="some_path") + with pytest.raises(ValueError) as exception: + emodel.load_ensemble() + assert ( + exception.value.args[0] + == "No realizations found for ensemble iter-0, located at 'some_path'. Aborting..." + ) + + +@pytest.mark.usefixtures("app") +def test_smry_load(testdata_folder): + + emodel = EnsembleModel( + ensemble_name="iter-0", + ensemble_path=Path(testdata_folder) + / "reek_history_match" + / "realization-*" + / "iter-0", + ) + smry = emodel.load_smry() + assert len(smry.columns) == 475 + assert len(smry["DATE"].unique()) == 480 + + +@pytest.mark.usefixtures("app") +def test_smry_load_filter_and_dtypes(testdata_folder): + + emodel = EnsembleModel( + ensemble_name="iter-0", + ensemble_path=Path(testdata_folder) + / "reek_history_match" + / "realization-*" + / "iter-0", + ) + smry = emodel.load_smry(column_keys=["FO*"], time_index="yearly") + assert set(smry.columns) == set( + [ + "DATE", + "REAL", + "FOPRF", + "FOPRS", + "FOPRH", + "FOPTH", + "FOPR", + "FOPTS", + "FOPTF", + "FOPT", + "FOIP", + "FOPP", + ] + ) + assert set(smry["DATE"].unique()) == set( + [ + datetime.date(2000, 1, 1), + datetime.date(2001, 1, 1), + datetime.date(2002, 1, 1), + datetime.date(2003, 1, 1), + datetime.date(2004, 1, 1), + ] + ) + assert smry["DATE"].dtype == np.dtype("O") + assert smry["REAL"].dtype == np.dtype("int64") + assert all( + np.issubdtype(dtype, np.number) + for dtype in smry.drop(["REAL", "DATE"], axis=1).dtypes + ) + smry = emodel.load_smry( + column_keys=["F[OGW]P?", "WWCT:OP*", "FOIP"], time_index="yearly" + ) + assert set(smry.columns) == set( + [ + "REAL", + "DATE", + "FGPP", + "FGPR", + "FGPT", + "FOPP", + "FOPR", + "FOPT", + "FWPP", + "FWPR", + "FWPT", + "WWCT:OP_1", + "WWCT:OP_2", + "WWCT:OP_3", + "WWCT:OP_4", + "WWCT:OP_5", + "FOIP", + ] + ) + + +@pytest.mark.usefixtures("app") +def test_smry_meta(testdata_folder): + emodel = EnsembleModel( + ensemble_name="iter-0", + ensemble_path=Path(testdata_folder) + / "reek_history_match" + / "realization-*" + / "iter-0", + ) + smeta = emodel.load_smry_meta() + assert set(smeta.columns) == set( + ["unit", "is_total", "is_rate", "is_historical", "keyword", "wgname", "get_num"] + ) + assert len(smeta) == 473 + assert "FOPT" in smeta.index + + +@pytest.mark.usefixtures("app") +def test_parameter_loading(testdata_folder): + emodel = EnsembleModel( + ensemble_name="iter-0", + ensemble_path=Path(testdata_folder) + / "reek_history_match" + / "realization-*" + / "iter-0", + ) + parameters = emodel.load_parameters() + assert "REAL" in parameters.columns + assert parameters["REAL"].dtype == np.dtype("int64") + assert len(parameters.columns) == 27 + + +@pytest.mark.usefixtures("app") +def test_load_csv(testdata_folder): + emodel = EnsembleModel( + ensemble_name="iter-0", + ensemble_path=Path(testdata_folder) + / "reek_history_match" + / "realization-*" + / "iter-0", + ) + dframe = emodel.load_csv(Path("share") / "results" / "tables" / "rft.csv") + assert "REAL" in dframe.columns + assert dframe["REAL"].dtype == np.dtype("int64") + assert len(dframe.columns) == 14 + + +@pytest.mark.usefixtures("app") +def test_webviz_store(testdata_folder): + emodel = EnsembleModel( + ensemble_name="iter-0", + ensemble_path=Path(testdata_folder) + / "reek_history_match" + / "realization-*" + / "iter-0", + ) + emodel.load_parameters() + assert len(emodel.webviz_store) == 1 + emodel.load_smry() + assert len(emodel.webviz_store) == 2 + emodel.load_smry(column_keys=["FOIP"]) + assert len(emodel.webviz_store) == 3 + emodel.load_smry(time_index="raw") + assert len(emodel.webviz_store) == 4 + emodel.load_smry_meta() + assert len(emodel.webviz_store) == 5 + emodel.load_smry_meta(column_keys=["R*", "GW?T*"]) + assert len(emodel.webviz_store) == 6 + emodel.load_csv(Path("share") / "results" / "tables" / "rft.csv") + assert len(emodel.webviz_store) == 7 diff --git a/tests/unit_tests/model_tests/test_ensemble_set_model.py b/tests/unit_tests/model_tests/test_ensemble_set_model.py new file mode 100644 index 000000000..32c7da386 --- /dev/null +++ b/tests/unit_tests/model_tests/test_ensemble_set_model.py @@ -0,0 +1,142 @@ +from pathlib import Path + +import pytest +import numpy as np + +from webviz_subsurface._models.ensemble_set_model import EnsembleSetModel + + +@pytest.mark.usefixtures("app") +def test_single_ensemble(testdata_folder): + + emodel = EnsembleSetModel( + ensemble_paths={ + "iter-0": str( + Path( + testdata_folder / "reek_history_match" / "realization-*" / "iter-0" + ) + ) + } + ) + assert emodel.ens_folders == { + "iter-0": Path("webviz-subsurface-testdata") / "reek_history_match" + } + assert len(emodel.ensembles) == 1 + smry = emodel.load_smry() + assert len(smry.columns) == 476 + assert len(smry["DATE"].unique()) == 480 + assert smry["ENSEMBLE"].unique() == ["iter-0"] + assert smry["ENSEMBLE"].dtype == np.dtype("O") + assert all( + np.issubdtype(dtype, np.number) + for dtype in smry.drop(["REAL", "ENSEMBLE", "DATE"], axis=1).dtypes + ) + + parameters = emodel.load_parameters() + assert all(col in parameters.columns for col in ["ENSEMBLE", "REAL"]) + assert parameters["ENSEMBLE"].dtype == np.dtype("O") + assert parameters["REAL"].dtype == np.dtype("int64") + assert len(parameters.columns) == 28 + + +@pytest.mark.usefixtures("app") +def test_smry_load_multiple_ensembles(testdata_folder): + + emodel = EnsembleSetModel( + ensemble_paths={ + "iter-0": str( + Path( + testdata_folder / "reek_history_match" / "realization-*" / "iter-0" + ) + ), + "iter-1": str( + Path( + testdata_folder / "reek_history_match" / "realization-*" / "iter-1" + ) + ), + "iter-2": str( + Path( + testdata_folder / "reek_history_match" / "realization-*" / "iter-2" + ) + ), + "iter-3": str( + Path( + testdata_folder / "reek_history_match" / "realization-*" / "iter-3" + ) + ), + } + ) + smry = emodel.load_smry() + assert len(smry.columns) == 476 + assert len(smry["DATE"].unique()) == 1141 + assert set(smry["ENSEMBLE"].unique()) == set( + ["iter-0", "iter-1", "iter-2", "iter-3"] + ) + assert smry["ENSEMBLE"].dtype == np.dtype("O") + # assert smry["DATE"].dtype == np.dtype("O") # Fails due to wrong input data? + assert smry["REAL"].dtype == np.dtype("int64") + assert all( + np.issubdtype(dtype, np.number) + for dtype in smry.drop(["REAL", "ENSEMBLE", "DATE"], axis=1).dtypes + ) + smeta = emodel.load_smry_meta() + assert set(smeta.columns) == set( + ["unit", "is_total", "is_rate", "is_historical", "keyword", "wgname", "get_num"] + ) + assert len(smeta) == 473 + assert "FOPT" in smeta.index + + parameters = emodel.load_parameters() + assert all(col in parameters.columns for col in ["ENSEMBLE", "REAL"]) + assert parameters["ENSEMBLE"].dtype == np.dtype("O") + assert parameters["REAL"].dtype == np.dtype("int64") + assert len(parameters.columns) == 28 + + dframe = emodel.load_csv(Path("share") / "results" / "tables" / "rft.csv") + assert "ENSEMBLE" in dframe.columns + assert dframe["ENSEMBLE"].dtype == np.dtype("O") + assert len(dframe["ENSEMBLE"].unique()) == 4 + assert len(dframe.columns) == 15 + + with pytest.raises(KeyError) as exc: + emodel.load_csv("some_path") + assert ( + exc.value.args[0] + == "No data found for load_csv with arguments: {'csv_file': 'some_path'}" + ) + + +@pytest.mark.usefixtures("app") +def test_webvizstore(testdata_folder): + emodel = EnsembleSetModel( + ensemble_paths={ + "iter-0": str( + Path( + testdata_folder / "reek_history_match" / "realization-*" / "iter-0" + ) + ), + "iter-1": str( + Path( + testdata_folder / "reek_history_match" / "realization-*" / "iter-1" + ) + ), + "iter-2": str( + Path( + testdata_folder / "reek_history_match" / "realization-*" / "iter-2" + ) + ), + "iter-3": str( + Path( + testdata_folder / "reek_history_match" / "realization-*" / "iter-3" + ) + ), + } + ) + emodel.load_parameters() + assert len(emodel.webvizstore) == 4 + emodel.load_smry() + assert len(emodel.webvizstore) == 8 + emodel.load_smry_meta() + assert len(emodel.webvizstore) == 12 + emodel.load_csv(Path("share") / "results" / "tables" / "rft.csv") + assert len(emodel.webvizstore) == 16 diff --git a/webviz_subsurface/_models/__init__.py b/webviz_subsurface/_models/__init__.py new file mode 100644 index 000000000..3d460dece --- /dev/null +++ b/webviz_subsurface/_models/__init__.py @@ -0,0 +1,2 @@ +from .ensemble_model import EnsembleModel +from .ensemble_set_model import EnsembleSetModel diff --git a/webviz_subsurface/_models/ensemble_model.py b/webviz_subsurface/_models/ensemble_model.py new file mode 100644 index 000000000..60a0799ad --- /dev/null +++ b/webviz_subsurface/_models/ensemble_model.py @@ -0,0 +1,151 @@ +import fnmatch +import pathlib +import re +from typing import Union, Optional, List, Tuple, Callable, Dict + +import pandas as pd +from fmu.ensemble import ScratchEnsemble +from webviz_config.common_cache import CACHE + + +from webviz_config.webviz_store import webvizstore + + +class EnsembleModel: + """Class to load data from a scratchensemble using fmu.ensemble""" + + def __init__( + self, + ensemble_name: str, + ensemble_path: Union[str, pathlib.Path], + filter_file: Union[str, None] = "OK", + ) -> None: + self.ensemble_name = ensemble_name + self.ensemble_path = str(ensemble_path) + self.filter_file = filter_file + self._webviz_store: List = [] + + def __repr__(self) -> str: + return f"EnsembleModel: {self.ensemble_name, self.ensemble_path}" + + @property + def ens_folder(self) -> dict: + """Get root folder for ensemble""" + return {self.ensemble_name: self.ensemble_path.split("realization")[0]} + + @CACHE.memoize(timeout=CACHE.TIMEOUT) + def load_ensemble(self) -> ScratchEnsemble: + ensemble = ( + ScratchEnsemble(self.ensemble_name, self.ensemble_path) + if self.filter_file is None + else ScratchEnsemble(self.ensemble_name, self.ensemble_path).filter( + self.filter_file + ) + ) + if ensemble.realizations == {}: + raise ValueError( + f"No realizations found for ensemble {self.ensemble_name}, " + f"located at '{self.ensemble_path}'. " + "Aborting..." + ) + return ensemble + + def load_parameters(self) -> pd.DataFrame: + self._webviz_store.append( + ( + self._load_parameters, + [{"self": self}], + ) + ) + return self._load_parameters() + + def load_smry( + self, + time_index: Optional[Union[list, str]] = None, + column_keys: Optional[list] = None, + ) -> pd.DataFrame: + self._webviz_store.append( + ( + self._load_smry, + [{"self": self, "time_index": time_index, "column_keys": None}], + ) + ) + + if column_keys is None: + return self._load_smry(time_index=time_index) + df = self._load_smry(time_index=time_index) + return df[ + df.columns[_match_column_keys(df_index=df.columns, column_keys=column_keys)] + ] + + def load_smry_meta( + self, + column_keys: Optional[list] = None, + ) -> pd.DataFrame: + """Finds metadata for the summary vectors in the ensemble.""" + self._webviz_store.append( + (self._load_smry_meta, [{"self": self, "column_keys": None}]) + ) + if column_keys is None: + return self._load_smry_meta() + df = self._load_smry_meta() + return df[_match_column_keys(df_index=df.index, column_keys=column_keys)] + + def load_csv(self, csv_file: pathlib.Path) -> pd.DataFrame: + self._webviz_store.append( + ( + self._load_csv, + [{"self": self, "csv_file": csv_file}], + ) + ) + return self._load_csv(csv_file=csv_file) + + @CACHE.memoize(timeout=CACHE.TIMEOUT) + @webvizstore + def _load_parameters(self) -> pd.DataFrame: + return self.load_ensemble().parameters + + @CACHE.memoize(timeout=CACHE.TIMEOUT) + @webvizstore + def _load_smry( + self, + time_index: Optional[Union[list, str]] = None, + column_keys: Optional[list] = None, + ) -> pd.DataFrame: + return self.load_ensemble().get_smry( + time_index=time_index, column_keys=column_keys + ) + + @CACHE.memoize(timeout=CACHE.TIMEOUT) + @webvizstore + def _load_smry_meta( + self, + column_keys: Optional[list] = None, + ) -> pd.DataFrame: + """Finds metadata for the summary vectors in the ensemble.""" + return pd.DataFrame( + self.load_ensemble().get_smry_meta(column_keys=column_keys) + ).T + + @CACHE.memoize(timeout=CACHE.TIMEOUT) + @webvizstore + def _load_csv(self, csv_file: pathlib.Path) -> pd.DataFrame: + return self.load_ensemble().load_csv(str(csv_file)) + + @property + def webviz_store(self) -> List[Tuple[Callable, List[Dict]]]: + return self._webviz_store + + +def _match_column_keys( + df_index: pd.core.indexes.base.Index, column_keys: List[str] +) -> pd.core.indexes.base.Index: + """Matches patterns in column_keys with the columns in df_columns, and adds 'DATE' and + 'REAL' to the requested column patterns. + """ + all_columns_keys = ["DATE", "REAL"] + all_columns_keys.extend(column_keys) + regex = re.compile( + "|".join([fnmatch.translate(column_key) for column_key in all_columns_keys]) + ) + return df_index.map(lambda column: bool(regex.fullmatch(column))) diff --git a/webviz_subsurface/_models/ensemble_set_model.py b/webviz_subsurface/_models/ensemble_set_model.py new file mode 100644 index 000000000..d75b3749e --- /dev/null +++ b/webviz_subsurface/_models/ensemble_set_model.py @@ -0,0 +1,91 @@ +from typing import Union, Optional, List, Callable, Tuple, Dict, Any +import pathlib + +import pandas as pd + +from .ensemble_model import EnsembleModel + + +class EnsembleSetModel: + """Class to load and manipulate ensemble sets from given paths to + ensembles on disk""" + + def __init__( + self, + ensemble_paths: dict, + ensemble_set_name: str = "EnsembleSet", + filter_file: Union[str, None] = "OK", + ) -> None: + self.ensemble_paths = ensemble_paths + self.ensemble_set_name = ensemble_set_name + self.filter_file = filter_file + self._webvizstore: List = [] + self.ensembles = [ + EnsembleModel(ens_name, ens_path, filter_file=self.filter_file) + for ens_name, ens_path in self.ensemble_paths.items() + ] + + def __repr__(self) -> str: + return f"EnsembleSetModel: {self.ensemble_paths}" + + @property + def ens_folders(self) -> dict: + """Get root folders for ensemble set""" + return { + ens: pathlib.Path(ens_path.split("realization")[0]) + for ens, ens_path in self.ensemble_paths.items() + } + + def _get_ensembles_data(self, func: str, **kwargs: Any) -> pd.DataFrame: + """Runs the provided function for each ensemble and concats dataframes""" + dfs = [] + for ensemble in self.ensembles: + try: + dframe = getattr(ensemble, func)(**kwargs) + dframe.insert(0, "ENSEMBLE", ensemble.ensemble_name) + dfs.append(dframe) + except (KeyError, ValueError): + # Happens if an ensemble is missing some data + # Warning has already been issued at initialization + pass + if dfs: + return pd.concat(dfs, sort=False) + raise KeyError(f"No data found for {func} with arguments: {kwargs}") + + def load_parameters(self) -> pd.DataFrame: + return self._get_ensembles_data("load_parameters") + + def load_smry( + self, + time_index: Optional[Union[list, str]] = None, + column_keys: Optional[list] = None, + ) -> pd.DataFrame: + return self._get_ensembles_data( + "load_smry", time_index=time_index, column_keys=column_keys + ) + + def load_smry_meta( + self, + column_keys: Optional[list] = None, + ) -> pd.DataFrame: + """Finds metadata for the summary vectors in the ensemble set. + Note that we assume the same units for all ensembles. + (meaning that we update/overwrite when checking the next ensemble) + """ + + smry_meta: dict = {} + for ensemble in self.ensembles: + smry_meta.update( + ensemble.load_smry_meta(column_keys=column_keys).T.to_dict() + ) + return pd.DataFrame(smry_meta).transpose() + + def load_csv(self, csv_file: pathlib.Path) -> pd.DataFrame: + return self._get_ensembles_data("load_csv", csv_file=csv_file) + + @property + def webvizstore(self) -> List[Tuple[Callable, List[Dict]]]: + store_functions = [] + for ensemble in self.ensembles: + store_functions.extend(ensemble.webviz_store) + return store_functions diff --git a/webviz_subsurface/plugins/_bhp_qc.py b/webviz_subsurface/plugins/_bhp_qc.py index fd4a2d824..dfaa14bc1 100644 --- a/webviz_subsurface/plugins/_bhp_qc.py +++ b/webviz_subsurface/plugins/_bhp_qc.py @@ -9,7 +9,7 @@ from webviz_config.common_cache import CACHE from webviz_config import WebvizPluginABC -from .._datainput.fmu_input import load_smry +from webviz_subsurface._models import EnsembleSetModel from .._utils.unique_theming import unique_colors @@ -39,21 +39,19 @@ def __init__( wells: Optional[List[str]] = None, ): super().__init__() - self.ens_paths = { - ensemble: app.webviz_settings["shared_settings"]["scratch_ensembles"][ - ensemble - ] - for ensemble in ensembles - } if wells is None: self.column_keys = ["WBHP:*"] else: self.column_keys = [f"WBHP:{well}" for well in wells] - self.smry = load_smry( - ensemble_paths=self.ens_paths, - time_index="raw", - column_keys=self.column_keys, + self.emodel = EnsembleSetModel( + ensemble_paths={ + ens: app.webviz_settings["shared_settings"]["scratch_ensembles"][ens] + for ens in ensembles + } + ) + self.smry = self.emodel.load_smry( + time_index="raw", column_keys=self.column_keys ) self.theme = app.webviz_settings["theme"] self.set_callbacks(app) @@ -337,18 +335,7 @@ def _update_stat_selector(plot_type): ) def add_webvizstore(self): - return [ - ( - load_smry, - [ - { - "ensemble_paths": self.ens_paths, - "column_keys": self.column_keys, - "time_index": "raw", - } - ], - ), - ] + return self.emodel.webvizstore @CACHE.memoize(timeout=CACHE.TIMEOUT) diff --git a/webviz_subsurface/plugins/_parameter_analysis/models/__init__.py b/webviz_subsurface/plugins/_parameter_analysis/models/__init__.py index 0e0635621..722aed2b2 100644 --- a/webviz_subsurface/plugins/_parameter_analysis/models/__init__.py +++ b/webviz_subsurface/plugins/_parameter_analysis/models/__init__.py @@ -1,3 +1,2 @@ -from .ensemble_set_model import EnsembleSetModel from .simulation_timeseries_model import SimulationTimeSeriesModel from .parameters_model import ParametersModel diff --git a/webviz_subsurface/plugins/_parameter_analysis/models/ensemble_set_model.py b/webviz_subsurface/plugins/_parameter_analysis/models/ensemble_set_model.py deleted file mode 100644 index 36d7cef76..000000000 --- a/webviz_subsurface/plugins/_parameter_analysis/models/ensemble_set_model.py +++ /dev/null @@ -1,130 +0,0 @@ -from typing import Union, Optional, List -import pathlib - -import pandas as pd -from fmu.ensemble import ScratchEnsemble, EnsembleSet -from webviz_config.common_cache import CACHE -from webviz_config.webviz_store import webvizstore - - -class EnsembleSetModel: - """Class to load and manipulate ensemble sets from scratch disk - using fmu.ensemble""" - - def __init__( - self, - ensemble_paths: dict, - ensemble_set_name: str = "EnsembleSet", - filter_file: Union[str, None] = "OK", - ) -> None: - self.ensemble_paths = ensemble_paths - self.ensemble_set_name = ensemble_set_name - self.filter_file = filter_file - self.webvizstore: List = [] - - def __repr__(self) -> str: - return f"EnsembleSetModel: {self.ensemble_paths}" - - @property - def ens_folders(self): - """Get root folders for ensemble set""" - return { - ens: pathlib.Path(ens_path.split("realization")[0]) - for ens, ens_path in self.ensemble_paths.items() - } - - @CACHE.memoize(timeout=CACHE.TIMEOUT) - def load_ensemble_set(self) -> EnsembleSet: - return EnsembleSet( - self.ensemble_set_name, - [ - ScratchEnsemble(ens_name, ens_path) - if self.filter_file is None - else ScratchEnsemble(ens_name, ens_path).filter(self.filter_file) - for ens_name, ens_path in self.ensemble_paths.items() - ], - ) - - def load_parameters(self): - self.webvizstore.append( - ( - self._load_parameters, - [{"self": self}], - ) - ) - return self._load_parameters() - - @CACHE.memoize(timeout=CACHE.TIMEOUT) - @webvizstore - def _load_parameters(self) -> pd.DataFrame: - return self.load_ensemble_set().parameters - - def load_smry( - self, - time_index: Optional[Union[list, str]] = None, - column_keys: Optional[list] = None, - ) -> pd.DataFrame: - self.webvizstore.append( - ( - self._load_smry, - [{"self": self, "time_index": time_index, "column_keys": column_keys}], - ) - ) - return self._load_smry(time_index=time_index, column_keys=column_keys) - - @CACHE.memoize(timeout=CACHE.TIMEOUT) - @webvizstore - def _load_smry( - self, - time_index: Optional[Union[list, str]] = None, - column_keys: Optional[list] = None, - ) -> pd.DataFrame: - - return self.load_ensemble_set().get_smry( - time_index=time_index, column_keys=column_keys - ) - - def load_smry_meta( - self, - column_keys: Optional[list] = None, - ) -> pd.DataFrame: - """Finds metadata for the summary vectors in the ensemble set. - Note that we assume the same units for all ensembles. - (meaning that we update/overwrite when checking the next ensemble) - """ - self.webvizstore.append( - (self._load_smry_meta, [{"self": self, "column_keys": column_keys}]) - ) - return self._load_smry_meta(column_keys=column_keys) - - @CACHE.memoize(timeout=CACHE.TIMEOUT) - @webvizstore - def _load_smry_meta( - self, - column_keys: Optional[list] = None, - ) -> pd.DataFrame: - """Finds metadata for the summary vectors in the ensemble set. - Note that we assume the same units for all ensembles. - (meaning that we update/overwrite when checking the next ensemble) - """ - ensemble_set = self.load_ensemble_set() - smry_meta = {} - for ensname in ensemble_set.ensemblenames: - smry_meta.update( - ensemble_set[ensname].get_smry_meta(column_keys=column_keys) - ) - return pd.DataFrame(smry_meta).transpose() - - def load_csv(self, csv_file: pathlib.Path) -> pd.DataFrame: - self.webvizstore.append( - ( - self._load_csv, - [{"self": self, "csv_file": csv_file}], - ) - ) - return self._load_csv(csv_file=csv_file) - - @CACHE.memoize(timeout=CACHE.TIMEOUT) - @webvizstore - def _load_csv(self, csv_file: pathlib.Path) -> pd.DataFrame: - return self.load_ensemble_set().load_csv(str(csv_file)) diff --git a/webviz_subsurface/plugins/_parameter_analysis/parameter_analysis.py b/webviz_subsurface/plugins/_parameter_analysis/parameter_analysis.py index d0e51107c..b7515f0bb 100644 --- a/webviz_subsurface/plugins/_parameter_analysis/parameter_analysis.py +++ b/webviz_subsurface/plugins/_parameter_analysis/parameter_analysis.py @@ -6,8 +6,9 @@ from webviz_config.webviz_assets import WEBVIZ_ASSETS import webviz_subsurface +from webviz_subsurface._models import EnsembleSetModel from .views import main_view -from .models import ParametersModel, SimulationTimeSeriesModel, EnsembleSetModel +from .models import ParametersModel, SimulationTimeSeriesModel from .controllers import ( parameter_qc_controller, parameter_response_controller, @@ -101,8 +102,6 @@ def __init__( ] for ens in ensembles } - if ensembles is not None - else None ) self.pmodel = ParametersModel( dataframe=self.emodel.load_parameters(), diff --git a/webviz_subsurface/plugins/_parameter_parallel_coordinates.py b/webviz_subsurface/plugins/_parameter_parallel_coordinates.py index 4558902ba..6e8724f3a 100644 --- a/webviz_subsurface/plugins/_parameter_parallel_coordinates.py +++ b/webviz_subsurface/plugins/_parameter_parallel_coordinates.py @@ -10,7 +10,7 @@ from webviz_config.common_cache import CACHE from webviz_config.webviz_store import webvizstore -from .._datainput.fmu_input import load_parameters, load_csv, load_smry +from webviz_subsurface._models import EnsembleSetModel from .._utils.parameter_response import filter_and_sum_responses @@ -183,25 +183,21 @@ def __init__( 'Incorrect arguments. Either provide "response_csv" or ' '"ensembles and/or response_file".' ) - self.ens_paths = { - ens: app.webviz_settings["shared_settings"]["scratch_ensembles"][ens] - for ens in ensembles - } - self.parameterdf = load_parameters( - ensemble_paths=self.ens_paths, ensemble_set_name="EnsembleSet" + self.emodel = EnsembleSetModel( + ensemble_paths={ + ens: app.webviz_settings["shared_settings"]["scratch_ensembles"][ + ens + ] + for ens in ensembles + } ) + self.parameterdf = self.emodel.load_parameters() if not self.no_responses: if self.response_file: - self.responsedf = load_csv( - ensemble_paths=self.ens_paths, - csv_file=response_file, - ensemble_set_name="EnsembleSet", - ) + self.responsedf = self.emodel.load_csv(csv_file=response_file) else: - self.responsedf = load_smry( - ensemble_paths=self.ens_paths, - column_keys=self.column_keys, - time_index=self.time_index, + self.responsedf = self.emodel.load_smry( + time_index=self.time_index, column_keys=self.column_keys ) self.response_filters["DATE"] = "single" else: @@ -605,42 +601,7 @@ def add_webvizstore(self): ) ) else: - functions.append( - ( - load_parameters, - [ - { - "ensemble_paths": self.ens_paths, - } - ], - ), - ) - if not self.no_responses: - if self.response_file: - functions.append( - ( - load_csv, - [ - { - "ensemble_paths": self.ens_paths, - "csv_file": self.response_file, - } - ], - ), - ) - else: - functions.append( - ( - load_smry, - [ - { - "ensemble_paths": self.ens_paths, - "column_keys": self.column_keys, - "time_index": self.time_index, - } - ], - ), - ) + functions.extend(self.emodel.webvizstore) return functions diff --git a/webviz_subsurface/plugins/_parameter_response_correlation.py b/webviz_subsurface/plugins/_parameter_response_correlation.py index fa642bb29..277870936 100644 --- a/webviz_subsurface/plugins/_parameter_response_correlation.py +++ b/webviz_subsurface/plugins/_parameter_response_correlation.py @@ -13,7 +13,8 @@ from webviz_config import WebvizPluginABC from webviz_config.utils import calculate_slider_step -from .._datainput.fmu_input import load_parameters, load_csv, load_smry +from webviz_subsurface._models import EnsembleSetModel +from .._datainput.fmu_input import load_parameters, load_csv class ParameterResponseCorrelation(WebvizPluginABC): @@ -172,8 +173,8 @@ def __init__( ensemble_set_name="EnsembleSet", ) else: - self.responsedf = load_smry( - ensemble_paths=self.ens_paths, + self.emodel = EnsembleSetModel(ensemble_paths=self.ens_paths) + self.responsedf = self.emodel.load_smry( column_keys=self.column_keys, time_index=self.time_index, ) @@ -543,7 +544,8 @@ def add_webvizstore(self): ], ), ] - return [ + + functions = [ ( load_parameters, [ @@ -553,28 +555,23 @@ def add_webvizstore(self): } ], ), - ( - load_csv, - [ - { - "ensemble_paths": self.ens_paths, - "csv_file": self.response_file, - "ensemble_set_name": "EnsembleSet", - } - ], - ) - if self.response_file - else ( - load_smry, - [ - { - "ensemble_paths": self.ens_paths, - "column_keys": self.column_keys, - "time_index": self.time_index, - } - ], - ), ] + if self.response_file: + functions.append( + ( + load_csv, + [ + { + "ensemble_paths": self.ens_paths, + "csv_file": self.response_file, + "ensemble_set_name": "EnsembleSet", + } + ], + ) + ) + else: + functions.extend(self.emodel.webvizstore) + return functions @CACHE.memoize(timeout=CACHE.TIMEOUT) diff --git a/webviz_subsurface/plugins/_property_statistics/models/__init__.py b/webviz_subsurface/plugins/_property_statistics/models/__init__.py index eaccf1bb4..79d8818cf 100644 --- a/webviz_subsurface/plugins/_property_statistics/models/__init__.py +++ b/webviz_subsurface/plugins/_property_statistics/models/__init__.py @@ -1,3 +1,2 @@ -from .ensemble_set_model import EnsembleSetModel from .simulation_timeseries_model import SimulationTimeSeriesModel from .property_statistics_model import PropertyStatisticsModel diff --git a/webviz_subsurface/plugins/_property_statistics/models/ensemble_set_model.py b/webviz_subsurface/plugins/_property_statistics/models/ensemble_set_model.py deleted file mode 100644 index 4eb838b85..000000000 --- a/webviz_subsurface/plugins/_property_statistics/models/ensemble_set_model.py +++ /dev/null @@ -1,121 +0,0 @@ -from typing import Union, Optional, List -import pathlib - -import pandas as pd -from fmu.ensemble import ScratchEnsemble, EnsembleSet -from webviz_config.common_cache import CACHE -from webviz_config.webviz_store import webvizstore - - -class EnsembleSetModel: - """Class to load and manipulate ensemble sets from scratch disk - using fmu.ensemble""" - - def __init__( - self, - ensemble_paths: dict, - ensemble_set_name: str = "EnsembleSet", - filter_file: Union[str, None] = "OK", - ) -> None: - self.ensemble_paths = ensemble_paths - self.ensemble_set_name = ensemble_set_name - self.filter_file = filter_file - self.webvizstore: List = [] - - def __repr__(self) -> str: - return f"EnsembleSetModel: {str(self.ensemble_paths)}" - - @property - def ens_folders(self): - """Get root folders for ensemble set""" - return { - ens: pathlib.Path(ens_path.split("realization")[0]) - for ens, ens_path in self.ensemble_paths.items() - } - - @CACHE.memoize(timeout=CACHE.TIMEOUT) - def load_ensemble_set(self) -> EnsembleSet: - return EnsembleSet( - self.ensemble_set_name, - [ - ScratchEnsemble(ens_name, ens_path) - if self.filter_file is None - else ScratchEnsemble(ens_name, ens_path).filter(self.filter_file) - for ens_name, ens_path in self.ensemble_paths.items() - ], - ) - - @CACHE.memoize(timeout=CACHE.TIMEOUT) - @webvizstore - def load_parameters(self) -> pd.DataFrame: - return self.load_ensemble_set().parameters - - def load_smry( - self, - time_index: Optional[Union[list, str]] = None, - column_keys: Optional[list] = None, - ) -> pd.DataFrame: - self.webvizstore.append( - ( - self._load_smry, - [{"self": self, "time_index": time_index, "column_keys": column_keys}], - ) - ) - return self._load_smry(time_index=time_index, column_keys=column_keys) - - @CACHE.memoize(timeout=CACHE.TIMEOUT) - @webvizstore - def _load_smry( - self, - time_index: Optional[Union[list, str]] = None, - column_keys: Optional[list] = None, - ) -> pd.DataFrame: - - return self.load_ensemble_set().get_smry( - time_index=time_index, column_keys=column_keys - ) - - def load_smry_meta( - self, - column_keys: Optional[list] = None, - ) -> pd.DataFrame: - """Finds metadata for the summary vectors in the ensemble set. - Note that we assume the same units for all ensembles. - (meaning that we update/overwrite when checking the next ensemble) - """ - self.webvizstore.append( - (self._load_smry_meta, [{"self": self, "column_keys": column_keys}]) - ) - return self._load_smry_meta(column_keys=column_keys) - - @CACHE.memoize(timeout=CACHE.TIMEOUT) - @webvizstore - def _load_smry_meta( - self, - column_keys: Optional[list] = None, - ) -> pd.DataFrame: - """Finds metadata for the summary vectors in the ensemble set. - Note that we assume the same units for all ensembles. - (meaning that we update/overwrite when checking the next ensemble) - """ - ensemble_set = self.load_ensemble_set() - smry_meta = {} - for ensname in ensemble_set.ensemblenames: - smry_meta.update( - ensemble_set[ensname].get_smry_meta(column_keys=column_keys) - ) - return pd.DataFrame(smry_meta).transpose() - - def load_csv(self, csv_file: pathlib.Path) -> pd.DataFrame: - self.webvizstore.append( - ( - self._load_csv, - [{"self": self, "csv_file": csv_file}], - ) - ) - return self._load_csv(csv_file=csv_file) - - @CACHE.memoize(timeout=CACHE.TIMEOUT) - @webvizstore - def _load_csv(self, csv_file: pathlib.Path) -> pd.DataFrame: - return self.load_ensemble_set().load_csv(str(csv_file)) diff --git a/webviz_subsurface/plugins/_property_statistics/property_statistics.py b/webviz_subsurface/plugins/_property_statistics/property_statistics.py index db0a65c0f..1395eb5ed 100644 --- a/webviz_subsurface/plugins/_property_statistics/property_statistics.py +++ b/webviz_subsurface/plugins/_property_statistics/property_statistics.py @@ -6,8 +6,9 @@ from webviz_config.webviz_assets import WEBVIZ_ASSETS import webviz_subsurface +from webviz_subsurface._models import EnsembleSetModel from .views import main_view -from .models import PropertyStatisticsModel, SimulationTimeSeriesModel, EnsembleSetModel +from .models import PropertyStatisticsModel, SimulationTimeSeriesModel from .controllers import ( property_qc_controller, property_delta_controller, @@ -112,8 +113,6 @@ def __init__( ] for ens in ensembles } - if ensembles is not None - else None ) self.pmodel = PropertyStatisticsModel( dataframe=self.emodel.load_csv(csv_file=self.statistics_file), @@ -121,7 +120,7 @@ def __init__( ) self.vmodel = SimulationTimeSeriesModel( dataframe=self.emodel.load_smry( - time_index=self.time_index, + time_index=self.time_index, column_keys=self.column_keys ), theme=self.theme, ) diff --git a/webviz_subsurface/plugins/_reservoir_simulation_timeseries.py b/webviz_subsurface/plugins/_reservoir_simulation_timeseries.py index d477d1864..94ba12b79 100644 --- a/webviz_subsurface/plugins/_reservoir_simulation_timeseries.py +++ b/webviz_subsurface/plugins/_reservoir_simulation_timeseries.py @@ -14,7 +14,7 @@ from webviz_config.webviz_store import webvizstore from webviz_config.common_cache import CACHE -from .._datainput.fmu_input import load_smry, load_smry_meta +from webviz_subsurface._models import EnsembleSetModel from .._abbreviations.reservoir_simulation import ( simulation_vector_description, simulation_unit_reformat, @@ -160,21 +160,19 @@ def __init__( sorted(pd.to_datetime(self.smry["DATE"]).unique()) ) elif ensembles: - self.ens_paths = { - ensemble: app.webviz_settings["shared_settings"]["scratch_ensembles"][ - ensemble - ] - for ensemble in ensembles - } - self.smry = load_smry( - ensemble_paths=self.ens_paths, - ensemble_set_name="EnsembleSet", - time_index=self.time_index, - column_keys=self.column_keys, + self.emodel = EnsembleSetModel( + ensemble_paths={ + ens: app.webviz_settings["shared_settings"]["scratch_ensembles"][ + ens + ] + for ens in ensembles + } ) - self.smry_meta = load_smry_meta( - ensemble_paths=self.ens_paths, - ensemble_set_name="EnsembleSet", + self.smry = self.emodel.load_smry( + time_index=self.time_index, column_keys=self.column_keys + ) + + self.smry_meta = self.emodel.load_smry_meta( column_keys=self.column_keys, ) else: @@ -923,31 +921,7 @@ def add_webvizstore(self): if self.csvfile: functions.append((read_csv, [{"csv_file": self.csvfile}])) else: - functions.append( - ( - load_smry, - [ - { - "ensemble_paths": self.ens_paths, - "ensemble_set_name": "EnsembleSet", - "time_index": self.time_index, - "column_keys": self.column_keys, - } - ], - ) - ) - functions.append( - ( - load_smry_meta, - [ - { - "ensemble_paths": self.ens_paths, - "ensemble_set_name": "EnsembleSet", - "column_keys": self.column_keys, - } - ], - ) - ) + functions.extend(self.emodel.webvizstore) if self.obsfile: functions.append((get_path, [{"path": self.obsfile}])) return functions diff --git a/webviz_subsurface/plugins/_reservoir_simulation_timeseries_onebyone.py b/webviz_subsurface/plugins/_reservoir_simulation_timeseries_onebyone.py index d98bfed8e..57948b8f6 100644 --- a/webviz_subsurface/plugins/_reservoir_simulation_timeseries_onebyone.py +++ b/webviz_subsurface/plugins/_reservoir_simulation_timeseries_onebyone.py @@ -15,12 +15,11 @@ from webviz_config.common_cache import CACHE from webviz_config.webviz_store import webvizstore +from webviz_subsurface._models import EnsembleSetModel from .._private_plugins.tornado_plot import TornadoPlot from .._datainput.fmu_input import ( - load_smry, get_realizations, find_sens_type, - load_smry_meta, ) from .._abbreviations.reservoir_simulation import ( simulation_vector_description, @@ -155,27 +154,23 @@ def __init__( ] for ensemble in ensembles } - # Extract realizations and sensitivity information - parameters = get_realizations( - ensemble_paths=self.ens_paths, ensemble_set_name="EnsembleSet" + self.emodel = EnsembleSetModel(ensemble_paths=self.ens_paths) + smry = self.emodel.load_smry( + time_index=self.time_index, column_keys=self.column_keys ) - smry = load_smry( - ensemble_paths=self.ens_paths, - ensemble_set_name="EnsembleSet", - time_index=self.time_index, + + self.smry_meta = self.emodel.load_smry_meta( column_keys=self.column_keys, ) - self.smry_meta = load_smry_meta( - ensemble_paths=self.ens_paths, - ensemble_set_name="EnsembleSet", - column_keys=self.column_keys, + # Extract realizations and sensitivity information + parameters = get_realizations( + ensemble_paths=self.ens_paths, ensemble_set_name="EnsembleSet" ) else: raise ValueError( 'Incorrent arguments. Either provide a "csvfile_smry" and "csvfile_parameters" or ' '"ensembles"' ) - self.data = pd.merge(smry, parameters, on=["ENSEMBLE", "REAL"]) self.smry_cols = [ c @@ -295,28 +290,8 @@ def add_webvizstore(self): ) ] if self.csvfile_smry and self.csvfile_parameters - else [ - ( - load_smry, - [ - { - "ensemble_paths": self.ens_paths, - "ensemble_set_name": "EnsembleSet", - "time_index": self.time_index, - "column_keys": self.column_keys, - } - ], - ), - ( - load_smry_meta, - [ - { - "ensemble_paths": self.ens_paths, - "ensemble_set_name": "EnsembleSet", - "column_keys": self.column_keys, - } - ], - ), + else self.emodel.webvizstore + + [ ( get_realizations, [ @@ -325,7 +300,7 @@ def add_webvizstore(self): "ensemble_set_name": "EnsembleSet", } ], - ), + ) ] ) diff --git a/webviz_subsurface/plugins/_reservoir_simulation_timeseries_regional.py b/webviz_subsurface/plugins/_reservoir_simulation_timeseries_regional.py index af2ef1faa..8b4868fee 100644 --- a/webviz_subsurface/plugins/_reservoir_simulation_timeseries_regional.py +++ b/webviz_subsurface/plugins/_reservoir_simulation_timeseries_regional.py @@ -20,7 +20,7 @@ from webviz_config.webviz_store import webvizstore from webviz_config import WebvizPluginABC -from .._datainput.fmu_input import load_smry, load_smry_meta +from webviz_subsurface._models import EnsembleSetModel from .._abbreviations.reservoir_simulation import ( simulation_vector_base, simulation_vector_description, @@ -133,19 +133,18 @@ def __init__( "'monthly' or 'yearly', as the statistics require the same dates throughout an" "ensemble." ) - self.ens_paths = { - ensemble: app.webviz_settings["shared_settings"]["scratch_ensembles"][ - ensemble - ] - for ensemble in ensembles - } - self.smry = load_smry( - ensemble_paths=self.ens_paths, - column_keys=self.column_keys, - time_index=self.time_index, + self.emodel = EnsembleSetModel( + ensemble_paths={ + ens: app.webviz_settings["shared_settings"]["scratch_ensembles"][ens] + for ens in ensembles + } ) - self.smry_meta = load_smry_meta( - ensemble_paths=self.ens_paths, column_keys=self.column_keys + self.smry = self.emodel.load_smry( + time_index=self.time_index, column_keys=self.column_keys + ) + + self.smry_meta = self.emodel.load_smry_meta( + column_keys=self.column_keys, ) self.field_totals = [ col for col in self.smry.columns if fnmatch.fnmatch(col, "F[OWG]PT") @@ -160,10 +159,15 @@ def __init__( ] ) else: - self.smry_init_prod = load_smry( - ensemble_paths=self.ens_paths, + total_smry = self.emodel.load_smry( + time_index=self.time_index, column_keys=["F[OWG]PT"], - time_index="first", + ) + self.smry_init_prod = pd.concat( + [ + df[df["DATE"] == min(df["DATE"])] + for _, df in total_smry.groupby("ENSEMBLE") + ] ) self.rec_ensembles = set(self.smry["ENSEMBLE"].unique()) for col in self.smry_init_prod.columns: @@ -338,40 +342,7 @@ def groupby_colors(self): return color_dict def add_webvizstore(self): - functions = [ - ( - load_smry, - [ - { - "ensemble_paths": self.ens_paths, - "column_keys": self.column_keys, - "time_index": self.time_index, - } - ], - ), - ( - load_smry_meta, - [ - { - "ensemble_paths": self.ens_paths, - "column_keys": self.column_keys, - } - ], - ), - ] - if not self.field_totals: - functions.append( - ( - load_smry, - [ - { - "ensemble_paths": self.ens_paths, - "column_keys": ["F[OWG]PT"], - "time_index": "first", - } - ], - ) - ) + functions = self.emodel.webvizstore if self.fipfile is not None: functions.append( (