Skip to content

Commit

Permalink
Add backwards compatibility for older portables using `ParameterAnaly…
Browse files Browse the repository at this point in the history
…sis` and `PropertyStatistic` (#995)

* add backwards compatibility for older portables

* get run_mode from WEBVIZ_INSTANCE_INFO
  • Loading branch information
tnatt authored Mar 30, 2022
1 parent c72a9e2 commit e25e8cb
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 81 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import pathlib

import pandas as pd
from webviz_config.common_cache import CACHE
from webviz_config.webviz_store import webvizstore


@CACHE.memoize(timeout=CACHE.TIMEOUT)
@webvizstore
def read_csv(csv_file: pathlib.Path) -> pd.DataFrame:
return pd.read_csv(csv_file)
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(
if column not in dataframe.columns:
raise KeyError(f"{column} column is missing from UNSMRY data")

dataframe = dataframe.copy()
# ensure correct format of date
dataframe["REAL"] = dataframe["REAL"].astype(int)
dataframe["DATE"] = pd.to_datetime(dataframe["DATE"])
Expand Down
109 changes: 71 additions & 38 deletions webviz_subsurface/plugins/_parameter_analysis/parameter_analysis.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
from pathlib import Path
from typing import Optional, Tuple
from typing import Optional, Tuple, Union

import pandas as pd
from dash import dcc
from webviz_config import WebvizPluginABC, WebvizSettings
from webviz_config.deprecation_decorators import deprecated_plugin_arguments
from webviz_config.webviz_instance_info import WEBVIZ_INSTANCE_INFO, WebvizRunMode

from webviz_subsurface._models import (
EnsembleSetModel,
caching_ensemble_set_model_factory,
)
from webviz_subsurface._providers import (
EnsembleSummaryProviderFactory,
EnsembleTableProviderFactory,
EnsembleTableProviderSet,
Frequency,
)

from .controllers import parameter_qc_controller, parameter_response_controller
from .data_loaders import read_csv
from .models import (
ParametersModel,
ProviderTimeSeriesDataModel,
Expand Down Expand Up @@ -81,14 +88,18 @@ def __init__(
column_keys: Optional[list] = None,
drop_constants: bool = True,
rel_file_pattern: str = "share/results/unsmry/*.arrow",
csvfile_parameters: Optional[Path] = None,
csvfile_smry: Optional[Path] = None,
csvfile_parameters: Path = None,
csvfile_smry: Path = None,
):
super().__init__()

self.theme = webviz_settings.theme
self.ensembles = ensembles
self.vmodel: Optional[
Union[SimulationTimeSeriesModel, ProviderTimeSeriesDataModel]
] = None

run_mode_portable = WEBVIZ_INSTANCE_INFO.run_mode == WebvizRunMode.PORTABLE
table_provider = EnsembleTableProviderFactory.instance()

if ensembles is not None:
Expand All @@ -102,55 +113,77 @@ def __init__(
resampling_frequency = Frequency(time_index)
provider_factory = EnsembleSummaryProviderFactory.instance()

provider_set = {}
for ens, ens_path in ensemble_paths.items():
try:
provider_set[
ens
] = provider_factory.create_from_arrow_unsmry_presampled(
try:
provider_set = {
ens: provider_factory.create_from_arrow_unsmry_presampled(
str(ens_path), rel_file_pattern, resampling_frequency
)
except ValueError as error:
message = (
f"No arrow files found at {rel_file_pattern} for ensemble {ens}. \n"
"If no arrow files have been generated with `ERT` using `ECL2CSV`, "
"the commandline tool `smry2arrow_batch` can be used to generate arrow "
"files for an ensemble"
for ens, ens_path in ensemble_paths.items()
}
self.vmodel = ProviderTimeSeriesDataModel(
provider_set=provider_set, column_keys=column_keys
)
parameter_df = create_df_from_table_provider(
table_provider.create_provider_set_from_per_realization_parameter_file(
ensemble_paths
)
)
except ValueError as error:
message = (
f"Some/all ensembles are missing arrow files at {rel_file_pattern}.\n"
"If no arrow files have been generated with `ERT` using `ECL2CSV`, "
"the commandline tool `smry2arrow_batch` can be used to generate arrow "
"files for an ensemble"
)
if not run_mode_portable:
raise ValueError(message) from error

self.vmodel = ProviderTimeSeriesDataModel(
provider_set=provider_set, column_keys=column_keys
)

parameterproviderset = (
table_provider.create_provider_set_from_per_realization_parameter_file(
ensemble_paths
# NOTE: this part below is to ensure backwards compatibility for portable app's
# created before the arrow support. It should be removed in the future.
emodel: EnsembleSetModel = (
caching_ensemble_set_model_factory.get_or_create_model(
ensemble_paths=ensemble_paths,
time_index=time_index,
column_keys=column_keys,
)
)
)
self.vmodel = SimulationTimeSeriesModel(
dataframe=emodel.get_or_load_smry_cached()
)
parameter_df = emodel.load_parameters()

elif csvfile_parameters is None:
raise ValueError("Either ensembles or csvfile_parameters must be specified")
else:
parameterproviderset = (
table_provider.create_provider_set_from_aggregated_csv_file(
csvfile_parameters
)
)
if csvfile_smry is not None:
smryprovider = (
# NOTE: the try/except is for backwards compatibility with existing portable app's.
# It should be removed in the future together with the support of aggregated csv-files
try:
parameter_df = create_df_from_table_provider(
table_provider.create_provider_set_from_aggregated_csv_file(
csvfile_smry
csvfile_parameters
)
)
self.vmodel = SimulationTimeSeriesModel(
dataframe=create_df_from_table_provider(smryprovider)
)
else:
self.vmodel = None
except FileNotFoundError:
if not run_mode_portable:
raise
parameter_df = read_csv(csvfile_parameters)

if csvfile_smry is not None:
try:
smry_df = create_df_from_table_provider(
table_provider.create_provider_set_from_aggregated_csv_file(
csvfile_smry
)
)
except FileNotFoundError:
if not run_mode_portable:
raise
smry_df = read_csv(csvfile_smry)

self.vmodel = SimulationTimeSeriesModel(dataframe=smry_df)

self.pmodel = ParametersModel(
dataframe=create_df_from_table_provider(parameterproviderset),
dataframe=parameter_df,
theme=self.theme,
drop_constants=drop_constants,
)
Expand All @@ -177,7 +210,7 @@ def set_callbacks(self, app) -> None:
)


def create_df_from_table_provider(provider) -> pd.DataFrame:
def create_df_from_table_provider(provider: EnsembleTableProviderSet) -> pd.DataFrame:
dfs = []
for ens in provider.ensemble_names():
df = provider.ensemble_provider(ens).get_column_data(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import pathlib

import pandas as pd
from webviz_config.common_cache import CACHE
from webviz_config.webviz_store import webvizstore


@CACHE.memoize(timeout=CACHE.TIMEOUT)
@webvizstore
def read_csv(csv_file: pathlib.Path) -> pd.DataFrame:
return pd.read_csv(csv_file)
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(
if column not in dataframe.columns:
raise KeyError(f"{column} column is missing from UNSMRY data")

dataframe = dataframe.copy()
# ensure correct format of date
dataframe["REAL"] = dataframe["REAL"].astype(int)
dataframe["DATE"] = pd.to_datetime(dataframe["DATE"])
Expand Down
111 changes: 68 additions & 43 deletions webviz_subsurface/plugins/_property_statistics/property_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,23 @@
from dash import Dash, dcc
from webviz_config import WebvizConfigTheme, WebvizPluginABC, WebvizSettings
from webviz_config.deprecation_decorators import deprecated_plugin_arguments
from webviz_config.webviz_instance_info import WEBVIZ_INSTANCE_INFO, WebvizRunMode

from webviz_subsurface._models import (
EnsembleSetModel,
caching_ensemble_set_model_factory,
)
from webviz_subsurface._providers import (
EnsembleSummaryProviderFactory,
EnsembleTableProviderFactory,
Frequency,
)
from webviz_subsurface._providers.ensemble_table_provider import (
EnsembleTableProviderSet,
Frequency,
)

from .controllers.property_delta_controller import property_delta_controller
from .controllers.property_qc_controller import property_qc_controller
from .controllers.property_response_controller import property_response_controller
from .data_loaders import read_csv
from .models import (
PropertyStatisticsModel,
ProviderTimeSeriesDataModel,
Expand Down Expand Up @@ -93,17 +97,17 @@ def __init__(
surface_renaming: Optional[dict] = None,
time_index: str = "monthly",
column_keys: Optional[list] = None,
csvfile_statistics: Optional[Path] = None,
csvfile_smry: Optional[Path] = None,
csvfile_statistics: Path = None,
csvfile_smry: Path = None,
):
super().__init__()
self.theme: WebvizConfigTheme = webviz_settings.theme
self.ensembles = ensembles
self._surface_folders: Union[dict, None]
self._surface_folders: Union[dict, None] = None
self._vmodel: Optional[
Union[SimulationTimeSeriesModel, ProviderTimeSeriesDataModel]
] = None

run_mode_portable = WEBVIZ_INSTANCE_INFO.run_mode == WebvizRunMode.PORTABLE
table_provider = EnsembleTableProviderFactory.instance()

if ensembles is not None:
Expand All @@ -117,32 +121,45 @@ def __init__(
resampling_frequency = Frequency(time_index)
provider_factory = EnsembleSummaryProviderFactory.instance()

provider_set = {}
for ens, ens_path in ensemble_paths.items():
try:
provider_set[
ens
] = provider_factory.create_from_arrow_unsmry_presampled(
try:
provider_set = {
ens: provider_factory.create_from_arrow_unsmry_presampled(
str(ens_path), rel_file_pattern, resampling_frequency
)
except ValueError as error:
message = (
f"No arrow files found at {rel_file_pattern} for ensemble {ens}. \n"
"If no arrow files have been generated with `ERT` using `ECL2CSV`, "
"the commandline tool `smry2arrow_batch` can be used to generate arrow "
"files for an ensemble"
for ens, ens_path in ensemble_paths.items()
}
self._vmodel = ProviderTimeSeriesDataModel(
provider_set=provider_set, column_keys=column_keys
)
property_df = create_df_from_table_provider(
table_provider.create_provider_set_from_per_realization_csv_file(
ensemble_paths, statistics_file
)
)
except ValueError as error:
message = (
f"Some/all ensembles are missing arrow files at {rel_file_pattern}.\n"
"If no arrow files have been generated with `ERT` using `ECL2CSV`, "
"the commandline tool `smry2arrow_batch` can be used to generate arrow "
"files for an ensemble"
)
if not run_mode_portable:
raise ValueError(message) from error

self._vmodel = ProviderTimeSeriesDataModel(
provider_set=provider_set, column_keys=column_keys
)

propertyproviderset = (
table_provider.create_provider_set_from_per_realization_csv_file(
ensemble_paths, statistics_file
# NOTE: this part below is to ensure backwards compatibility for portable app's
# created before the arrow support. It should be removed in the future.
emodel: EnsembleSetModel = (
caching_ensemble_set_model_factory.get_or_create_model(
ensemble_paths=ensemble_paths,
time_index=time_index,
column_keys=column_keys,
)
)
)
self._vmodel = SimulationTimeSeriesModel(
dataframe=emodel.get_or_load_smry_cached()
)
property_df = emodel.load_csv(csv_file=Path(statistics_file))

self._surface_folders = {
ens: Path(ens_path.split("realization")[0]) / "share/results/maps" / ens
for ens, ens_path in ensemble_paths.items()
Expand All @@ -153,26 +170,34 @@ def __init__(
raise ValueError(
"If not 'ensembles', then csvfile_statistics must be provided"
)
propertyproviderset = (
table_provider.create_provider_set_from_aggregated_csv_file(
csvfile_statistics
)
)
if csvfile_smry is not None:
smryprovider = (
# NOTE: the try/except is for backwards compatibility with existing portable app's.
# It should be removed in the future together with the support of aggregated csv-files
try:
property_df = create_df_from_table_provider(
table_provider.create_provider_set_from_aggregated_csv_file(
csvfile_smry
csvfile_statistics
)
)
self._vmodel = SimulationTimeSeriesModel(
dataframe=create_df_from_table_provider(smryprovider)
)
self._surface_folders = None
except FileNotFoundError:
if not run_mode_portable:
raise
property_df = read_csv(csvfile_statistics)

self._pmodel = PropertyStatisticsModel(
dataframe=create_df_from_table_provider(propertyproviderset),
theme=self.theme,
)
if csvfile_smry is not None:
try:
smry_df = create_df_from_table_provider(
table_provider.create_provider_set_from_aggregated_csv_file(
csvfile_smry
)
)
except FileNotFoundError:
if not run_mode_portable:
raise
smry_df = read_csv(csvfile_smry)

self._vmodel = SimulationTimeSeriesModel(dataframe=smry_df)

self._pmodel = PropertyStatisticsModel(dataframe=property_df, theme=self.theme)

self._surface_renaming = surface_renaming if surface_renaming else {}
self._surface_table = generate_surface_table(
Expand Down

0 comments on commit e25e8cb

Please sign in to comment.