Skip to content

Commit

Permalink
Avoid copying surfaces when using MapViewerFMU in non-portable mode (
Browse files Browse the repository at this point in the history
  • Loading branch information
sigurdp authored Mar 28, 2022
1 parent b5ef68c commit 5dac1ed
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 32 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed

- [#988](https://github.com/equinor/webviz-subsurface/pull/988) - `ParameterAnalysis` and `PropertyStatistics`- Changed to reading unsmry data from `.arrow` files and deprecated the usage of aggregated csvfiles as input.
- [#986](https://github.com/equinor/webviz-subsurface/pull/986) - Avoid copying surfaces when using `MapViewerFMU` in non-portable mode.

## [0.2.11] - 2022-03-14

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,18 @@ def write_backing_store(
storage_key: str,
sim_surfaces: List[SurfaceFileInfo],
obs_surfaces: List[SurfaceFileInfo],
avoid_copying_surfaces: bool,
) -> None:
"""If avoid_copying_surfaces if True, the specified surfaces will NOT be copied
into the backing store, but will be referenced from their source locations.
Note that this is only useful when running in non-portable mode and will fail
in portable mode.
"""

timer = PerfTimer()

do_copy_surfs_into_store = not avoid_copying_surfaces

# All data for this provider will be stored inside a sub-directory
# given by the storage key
provider_dir = storage_dir / storage_key
Expand All @@ -82,44 +90,55 @@ def write_backing_store(
original_path_arr: List[str] = []

for surfinfo in sim_surfaces:
rel_path_in_store = _compose_rel_sim_surf_path(
real=surfinfo.real,
attribute=surfinfo.attribute,
name=surfinfo.name,
datestr=surfinfo.datestr,
extension=Path(surfinfo.path).suffix,
)
type_arr.append(SurfaceType.SIMULATED)
real_arr.append(surfinfo.real)
attribute_arr.append(surfinfo.attribute)
name_arr.append(surfinfo.name)
datestr_arr.append(surfinfo.datestr if surfinfo.datestr else "")
rel_path_arr.append(str(rel_path_in_store))
original_path_arr.append(surfinfo.path)

rel_path_in_store = ""
if do_copy_surfs_into_store:
rel_path_in_store = _compose_rel_sim_surf_pathstr(
real=surfinfo.real,
attribute=surfinfo.attribute,
name=surfinfo.name,
datestr=surfinfo.datestr,
extension=Path(surfinfo.path).suffix,
)
rel_path_arr.append(rel_path_in_store)

# We want to strip out observed surfaces without a matching simulated surface
valid_obs_surfaces = _find_observed_surfaces_corresponding_to_simulated(
obs_surfaces=obs_surfaces, sim_surfaces=sim_surfaces
)

for surfinfo in valid_obs_surfaces:
rel_path_in_store = _compose_rel_obs_surf_path(
attribute=surfinfo.attribute,
name=surfinfo.name,
datestr=surfinfo.datestr,
extension=Path(surfinfo.path).suffix,
)
type_arr.append(SurfaceType.OBSERVED)
real_arr.append(-1)
attribute_arr.append(surfinfo.attribute)
name_arr.append(surfinfo.name)
datestr_arr.append(surfinfo.datestr if surfinfo.datestr else "")
rel_path_arr.append(str(rel_path_in_store))
original_path_arr.append(surfinfo.path)

LOGGER.debug(f"Copying {len(original_path_arr)} surfaces into backing store...")
rel_path_in_store = ""
if do_copy_surfs_into_store:
rel_path_in_store = _compose_rel_obs_surf_pathstr(
attribute=surfinfo.attribute,
name=surfinfo.name,
datestr=surfinfo.datestr,
extension=Path(surfinfo.path).suffix,
)
rel_path_arr.append(rel_path_in_store)

timer.lap_s()
_copy_surfaces_into_provider_dir(original_path_arr, rel_path_arr, provider_dir)
if do_copy_surfs_into_store:
LOGGER.debug(
f"Copying {len(original_path_arr)} surfaces into backing store..."
)
_copy_surfaces_into_provider_dir(
original_path_arr, rel_path_arr, provider_dir
)
et_copy_s = timer.lap_s()

surface_inventory_df = pd.DataFrame(
Expand All @@ -137,10 +156,16 @@ def write_backing_store(
parquet_file_name = provider_dir / "surface_inventory.parquet"
surface_inventory_df.to_parquet(path=parquet_file_name)

LOGGER.debug(
f"Wrote surface backing store in: {timer.elapsed_s():.2f}s ("
f"copy={et_copy_s:.2f}s)"
)
if do_copy_surfs_into_store:
LOGGER.debug(
f"Wrote surface backing store in: {timer.elapsed_s():.2f}s ("
f"copy={et_copy_s:.2f}s)"
)
else:
LOGGER.debug(
f"Wrote surface backing store without copying surfaces in: "
f"{timer.elapsed_s():.2f}s"
)

@staticmethod
def from_backing_store(
Expand Down Expand Up @@ -365,7 +390,18 @@ def _locate_simulated_surfaces(
& (df[Col.REAL].isin(realizations))
]

return [self._provider_dir / rel_path for rel_path in df[Col.REL_PATH]]
df = df[[Col.REL_PATH, Col.ORIGINAL_PATH]]

# Return file name within backing store if the surface was copied there,
# otherwise return the original source file name
fn_list: List[str] = []
for _index, row in df.iterrows():
if row[Col.REL_PATH]:
fn_list.append(self._provider_dir / row[Col.REL_PATH])
else:
fn_list.append(row[Col.ORIGINAL_PATH])

return fn_list

def _locate_observed_surfaces(
self, attribute: str, name: str, datestr: str
Expand All @@ -381,7 +417,18 @@ def _locate_observed_surfaces(
& (df[Col.DATESTR] == datestr)
]

return [self._provider_dir / rel_path for rel_path in df[Col.REL_PATH]]
df = df[[Col.REL_PATH, Col.ORIGINAL_PATH]]

# Return file name within backing store if the surface was copied there,
# otherwise return the original source file name
fn_list: List[str] = []
for _index, row in df.iterrows():
if row[Col.REL_PATH]:
fn_list.append(self._provider_dir / row[Col.REL_PATH])
else:
fn_list.append(row[Col.ORIGINAL_PATH])

return fn_list


def _find_observed_surfaces_corresponding_to_simulated(
Expand Down Expand Up @@ -421,33 +468,33 @@ def _copy_surfaces_into_provider_dir(
# executor.map(shutil.copyfile, original_path_arr, full_dst_path_arr)


def _compose_rel_sim_surf_path(
def _compose_rel_sim_surf_pathstr(
real: int,
attribute: str,
name: str,
datestr: Optional[str],
extension: str,
) -> Path:
) -> str:
"""Compose path to simulated surface file, relative to provider's directory"""
if datestr:
fname = f"{real}--{name}--{attribute}--{datestr}{extension}"
else:
fname = f"{real}--{name}--{attribute}{extension}"
return Path(REL_SIM_DIR) / fname
return str(Path(REL_SIM_DIR) / fname)


def _compose_rel_obs_surf_path(
def _compose_rel_obs_surf_pathstr(
attribute: str,
name: str,
datestr: Optional[str],
extension: str,
) -> Path:
) -> str:
"""Compose path to observed surface file, relative to provider's directory"""
if datestr:
fname = f"{name}--{attribute}--{datestr}{extension}"
else:
fname = f"{name}--{attribute}{extension}"
return Path(REL_OBS_DIR) / fname
return str(Path(REL_OBS_DIR) / fname)


def _calc_statistic_across_surfaces(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def main() -> None:
# factory = EnsembleSurfaceProviderFactory.instance()

factory = EnsembleSurfaceProviderFactory(
root_storage_dir, allow_storage_writes=True
root_storage_dir, allow_storage_writes=True, avoid_copying_surfaces=False
)

provider: EnsembleSurfaceProvider = factory.create_from_ensemble_surface_files(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,15 @@


class EnsembleSurfaceProviderFactory(WebvizFactory):
def __init__(self, root_storage_folder: Path, allow_storage_writes: bool) -> None:
def __init__(
self,
root_storage_folder: Path,
allow_storage_writes: bool,
avoid_copying_surfaces: bool,
) -> None:
self._storage_dir = Path(root_storage_folder) / __name__
self._allow_storage_writes = allow_storage_writes
self._avoid_copying_surfaces = avoid_copying_surfaces

LOGGER.info(
f"EnsembleSurfaceProviderFactory init: storage_dir={self._storage_dir}"
Expand All @@ -41,8 +47,13 @@ def instance() -> "EnsembleSurfaceProviderFactory":
app_instance_info = WEBVIZ_FACTORY_REGISTRY.app_instance_info
storage_folder = app_instance_info.storage_folder
allow_writes = app_instance_info.run_mode != WebvizRunMode.PORTABLE
dont_copy_surfs = app_instance_info.run_mode == WebvizRunMode.NON_PORTABLE

factory = EnsembleSurfaceProviderFactory(storage_folder, allow_writes)
factory = EnsembleSurfaceProviderFactory(
root_storage_folder=storage_folder,
allow_storage_writes=allow_writes,
avoid_copying_surfaces=dont_copy_surfs,
)

# Store the factory object in the global factory registry
WEBVIZ_FACTORY_REGISTRY.set_factory(EnsembleSurfaceProviderFactory, factory)
Expand Down Expand Up @@ -80,11 +91,14 @@ def create_from_ensemble_surface_files(
obs_surface_files = discover_observed_surface_files(ens_path, attribute_filter)
et_discover_s = timer.lap_s()

# As an optimization, avoid copying the surfaces into the backing store,
# typically when we're running in non-portable mode
ProviderImplFile.write_backing_store(
self._storage_dir,
storage_key,
sim_surfaces=sim_surface_files,
obs_surfaces=obs_surface_files,
avoid_copying_surfaces=self._avoid_copying_surfaces,
)
et_write_s = timer.lap_s()

Expand Down

0 comments on commit 5dac1ed

Please sign in to comment.