From 5dac1edd8fab54883f9b86a1b63b6048d6c890ef Mon Sep 17 00:00:00 2001 From: Sigurd Pettersen Date: Mon, 28 Mar 2022 09:29:06 +0200 Subject: [PATCH] Avoid copying surfaces when using `MapViewerFMU` in non-portable mode (#986) --- CHANGELOG.md | 1 + .../_provider_impl_file.py | 105 +++++++++++++----- .../dev_experiments.py | 2 +- .../ensemble_surface_provider_factory.py | 18 ++- 4 files changed, 94 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0605c18a7..f1d71b3c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - [#988](https://github.com/equinor/webviz-subsurface/pull/988) - `ParameterAnalysis` and `PropertyStatistics`- Changed to reading unsmry data from `.arrow` files and deprecated the usage of aggregated csvfiles as input. +- [#986](https://github.com/equinor/webviz-subsurface/pull/986) - Avoid copying surfaces when using `MapViewerFMU` in non-portable mode. ## [0.2.11] - 2022-03-14 diff --git a/webviz_subsurface/_providers/ensemble_surface_provider/_provider_impl_file.py b/webviz_subsurface/_providers/ensemble_surface_provider/_provider_impl_file.py index c9dbc904f..882c30923 100644 --- a/webviz_subsurface/_providers/ensemble_surface_provider/_provider_impl_file.py +++ b/webviz_subsurface/_providers/ensemble_surface_provider/_provider_impl_file.py @@ -61,10 +61,18 @@ def write_backing_store( storage_key: str, sim_surfaces: List[SurfaceFileInfo], obs_surfaces: List[SurfaceFileInfo], + avoid_copying_surfaces: bool, ) -> None: + """If avoid_copying_surfaces if True, the specified surfaces will NOT be copied + into the backing store, but will be referenced from their source locations. + Note that this is only useful when running in non-portable mode and will fail + in portable mode. + """ timer = PerfTimer() + do_copy_surfs_into_store = not avoid_copying_surfaces + # All data for this provider will be stored inside a sub-directory # given by the storage key provider_dir = storage_dir / storage_key @@ -82,44 +90,55 @@ def write_backing_store( original_path_arr: List[str] = [] for surfinfo in sim_surfaces: - rel_path_in_store = _compose_rel_sim_surf_path( - real=surfinfo.real, - attribute=surfinfo.attribute, - name=surfinfo.name, - datestr=surfinfo.datestr, - extension=Path(surfinfo.path).suffix, - ) type_arr.append(SurfaceType.SIMULATED) real_arr.append(surfinfo.real) attribute_arr.append(surfinfo.attribute) name_arr.append(surfinfo.name) datestr_arr.append(surfinfo.datestr if surfinfo.datestr else "") - rel_path_arr.append(str(rel_path_in_store)) original_path_arr.append(surfinfo.path) + rel_path_in_store = "" + if do_copy_surfs_into_store: + rel_path_in_store = _compose_rel_sim_surf_pathstr( + real=surfinfo.real, + attribute=surfinfo.attribute, + name=surfinfo.name, + datestr=surfinfo.datestr, + extension=Path(surfinfo.path).suffix, + ) + rel_path_arr.append(rel_path_in_store) + # We want to strip out observed surfaces without a matching simulated surface valid_obs_surfaces = _find_observed_surfaces_corresponding_to_simulated( obs_surfaces=obs_surfaces, sim_surfaces=sim_surfaces ) for surfinfo in valid_obs_surfaces: - rel_path_in_store = _compose_rel_obs_surf_path( - attribute=surfinfo.attribute, - name=surfinfo.name, - datestr=surfinfo.datestr, - extension=Path(surfinfo.path).suffix, - ) type_arr.append(SurfaceType.OBSERVED) real_arr.append(-1) attribute_arr.append(surfinfo.attribute) name_arr.append(surfinfo.name) datestr_arr.append(surfinfo.datestr if surfinfo.datestr else "") - rel_path_arr.append(str(rel_path_in_store)) original_path_arr.append(surfinfo.path) - LOGGER.debug(f"Copying {len(original_path_arr)} surfaces into backing store...") + rel_path_in_store = "" + if do_copy_surfs_into_store: + rel_path_in_store = _compose_rel_obs_surf_pathstr( + attribute=surfinfo.attribute, + name=surfinfo.name, + datestr=surfinfo.datestr, + extension=Path(surfinfo.path).suffix, + ) + rel_path_arr.append(rel_path_in_store) + timer.lap_s() - _copy_surfaces_into_provider_dir(original_path_arr, rel_path_arr, provider_dir) + if do_copy_surfs_into_store: + LOGGER.debug( + f"Copying {len(original_path_arr)} surfaces into backing store..." + ) + _copy_surfaces_into_provider_dir( + original_path_arr, rel_path_arr, provider_dir + ) et_copy_s = timer.lap_s() surface_inventory_df = pd.DataFrame( @@ -137,10 +156,16 @@ def write_backing_store( parquet_file_name = provider_dir / "surface_inventory.parquet" surface_inventory_df.to_parquet(path=parquet_file_name) - LOGGER.debug( - f"Wrote surface backing store in: {timer.elapsed_s():.2f}s (" - f"copy={et_copy_s:.2f}s)" - ) + if do_copy_surfs_into_store: + LOGGER.debug( + f"Wrote surface backing store in: {timer.elapsed_s():.2f}s (" + f"copy={et_copy_s:.2f}s)" + ) + else: + LOGGER.debug( + f"Wrote surface backing store without copying surfaces in: " + f"{timer.elapsed_s():.2f}s" + ) @staticmethod def from_backing_store( @@ -365,7 +390,18 @@ def _locate_simulated_surfaces( & (df[Col.REAL].isin(realizations)) ] - return [self._provider_dir / rel_path for rel_path in df[Col.REL_PATH]] + df = df[[Col.REL_PATH, Col.ORIGINAL_PATH]] + + # Return file name within backing store if the surface was copied there, + # otherwise return the original source file name + fn_list: List[str] = [] + for _index, row in df.iterrows(): + if row[Col.REL_PATH]: + fn_list.append(self._provider_dir / row[Col.REL_PATH]) + else: + fn_list.append(row[Col.ORIGINAL_PATH]) + + return fn_list def _locate_observed_surfaces( self, attribute: str, name: str, datestr: str @@ -381,7 +417,18 @@ def _locate_observed_surfaces( & (df[Col.DATESTR] == datestr) ] - return [self._provider_dir / rel_path for rel_path in df[Col.REL_PATH]] + df = df[[Col.REL_PATH, Col.ORIGINAL_PATH]] + + # Return file name within backing store if the surface was copied there, + # otherwise return the original source file name + fn_list: List[str] = [] + for _index, row in df.iterrows(): + if row[Col.REL_PATH]: + fn_list.append(self._provider_dir / row[Col.REL_PATH]) + else: + fn_list.append(row[Col.ORIGINAL_PATH]) + + return fn_list def _find_observed_surfaces_corresponding_to_simulated( @@ -421,33 +468,33 @@ def _copy_surfaces_into_provider_dir( # executor.map(shutil.copyfile, original_path_arr, full_dst_path_arr) -def _compose_rel_sim_surf_path( +def _compose_rel_sim_surf_pathstr( real: int, attribute: str, name: str, datestr: Optional[str], extension: str, -) -> Path: +) -> str: """Compose path to simulated surface file, relative to provider's directory""" if datestr: fname = f"{real}--{name}--{attribute}--{datestr}{extension}" else: fname = f"{real}--{name}--{attribute}{extension}" - return Path(REL_SIM_DIR) / fname + return str(Path(REL_SIM_DIR) / fname) -def _compose_rel_obs_surf_path( +def _compose_rel_obs_surf_pathstr( attribute: str, name: str, datestr: Optional[str], extension: str, -) -> Path: +) -> str: """Compose path to observed surface file, relative to provider's directory""" if datestr: fname = f"{name}--{attribute}--{datestr}{extension}" else: fname = f"{name}--{attribute}{extension}" - return Path(REL_OBS_DIR) / fname + return str(Path(REL_OBS_DIR) / fname) def _calc_statistic_across_surfaces( diff --git a/webviz_subsurface/_providers/ensemble_surface_provider/dev_experiments.py b/webviz_subsurface/_providers/ensemble_surface_provider/dev_experiments.py index 863c6b372..8925eea3a 100644 --- a/webviz_subsurface/_providers/ensemble_surface_provider/dev_experiments.py +++ b/webviz_subsurface/_providers/ensemble_surface_provider/dev_experiments.py @@ -35,7 +35,7 @@ def main() -> None: # factory = EnsembleSurfaceProviderFactory.instance() factory = EnsembleSurfaceProviderFactory( - root_storage_dir, allow_storage_writes=True + root_storage_dir, allow_storage_writes=True, avoid_copying_surfaces=False ) provider: EnsembleSurfaceProvider = factory.create_from_ensemble_surface_files( diff --git a/webviz_subsurface/_providers/ensemble_surface_provider/ensemble_surface_provider_factory.py b/webviz_subsurface/_providers/ensemble_surface_provider/ensemble_surface_provider_factory.py index e18d2ebbf..f7ee073c4 100644 --- a/webviz_subsurface/_providers/ensemble_surface_provider/ensemble_surface_provider_factory.py +++ b/webviz_subsurface/_providers/ensemble_surface_provider/ensemble_surface_provider_factory.py @@ -21,9 +21,15 @@ class EnsembleSurfaceProviderFactory(WebvizFactory): - def __init__(self, root_storage_folder: Path, allow_storage_writes: bool) -> None: + def __init__( + self, + root_storage_folder: Path, + allow_storage_writes: bool, + avoid_copying_surfaces: bool, + ) -> None: self._storage_dir = Path(root_storage_folder) / __name__ self._allow_storage_writes = allow_storage_writes + self._avoid_copying_surfaces = avoid_copying_surfaces LOGGER.info( f"EnsembleSurfaceProviderFactory init: storage_dir={self._storage_dir}" @@ -41,8 +47,13 @@ def instance() -> "EnsembleSurfaceProviderFactory": app_instance_info = WEBVIZ_FACTORY_REGISTRY.app_instance_info storage_folder = app_instance_info.storage_folder allow_writes = app_instance_info.run_mode != WebvizRunMode.PORTABLE + dont_copy_surfs = app_instance_info.run_mode == WebvizRunMode.NON_PORTABLE - factory = EnsembleSurfaceProviderFactory(storage_folder, allow_writes) + factory = EnsembleSurfaceProviderFactory( + root_storage_folder=storage_folder, + allow_storage_writes=allow_writes, + avoid_copying_surfaces=dont_copy_surfs, + ) # Store the factory object in the global factory registry WEBVIZ_FACTORY_REGISTRY.set_factory(EnsembleSurfaceProviderFactory, factory) @@ -80,11 +91,14 @@ def create_from_ensemble_surface_files( obs_surface_files = discover_observed_surface_files(ens_path, attribute_filter) et_discover_s = timer.lap_s() + # As an optimization, avoid copying the surfaces into the backing store, + # typically when we're running in non-portable mode ProviderImplFile.write_backing_store( self._storage_dir, storage_key, sim_surfaces=sim_surface_files, obs_surfaces=obs_surface_files, + avoid_copying_surfaces=self._avoid_copying_surfaces, ) et_write_s = timer.lap_s()