diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..61eba3185 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +.eggs +*.egg-info +__pycache__ +node_modules +venv +.vscode +.pytest_cache +*.pyc +.DS_Store diff --git a/README.md b/README.md new file mode 100644 index 000000000..594b6f19a --- /dev/null +++ b/README.md @@ -0,0 +1,48 @@ +# Webviz Subsurface configuration + + +### Introduction + +This repository contains subsurface specific standard webviz containers, which are used as +plugins in [webviz-config](https://github.com/equinor/webviz-config). + + +### Installation + +As Dash is using Python3-only functionality, you should create a Python3 +virtual environment before installation. One way of doing this in Equinor is +```bash +export PYTHON_VERSION=3.7.1 +source /prog/sdpsoft/env.sh + +PATH_TO_VENV='./my_new_venv' +python3 -m virtualenv $PATH_TO_VENV +source $PATH_TO_VENV/bin/activate +``` + +In order to install the utility, run +```bash +git clone git@github.com:Equinor/webviz-subsurface.git +cd webviz-subsurface +pip install . +``` + +### Usage + +For general usage, see the documentation on +[webviz-config](https://github.com/equinor/webviz-config). + +Take a look at this configuration example for something subsurface specific. + + +### Creating new elements + +If you are interested in creating new elements which can be configured through +the configuration file, take a look at the +[webviz-config contribution guide](https://github.com/equinor/webviz-config). + + +### Disclaimer + +This is a tool under heavy development. The current configuration file layout, +also for subsurface pages, will therefore see large changes. diff --git a/examples/basic_example.yaml b/examples/basic_example.yaml new file mode 100644 index 000000000..1a71537ea --- /dev/null +++ b/examples/basic_example.yaml @@ -0,0 +1,46 @@ +# This file demonstrates the most basic usage of webviz in a FMU setting +# The configuration files uses YAML (https://en.wikipedia.org/wiki/YAML). + +title: Reek Webviz Demonstration +username: some_username +password: some_password + +container_settings: + scratch_ensembles: + iter-0: /scratch/myfield/realization-*/iter-0 + iter-1: /scratch/myfield/realization-*/iter-1 + +pages: + + - title: Front page + content: + - Webviz created from configuration file + - title: Summary vectors + content: + - Some text... + - container: SummaryStats + ensemble: iter-0 + - ...some other text + - title: Parameter distribution + content: + - Some text... + - container: ParameterDistribution + ensemble: iter-0 + - ...some other text + - title: Subsurface map + content: + - container: SubsurfaceMap + ensemble: iter-0 + map_value: PERMX + flow_value: FLOWAT + time_step: 26 + - title: History match + content: + - container: HistoryMatch + ensembles: + - iter-0 + - iter-1 + observation_file: some_observation_file.yaml + - title: Another page + content: + - Some basic text with special characters... Åre, Smørbukk Sør. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..8febe5ffe --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +scipy>=1.2.1 diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..08ea85525 --- /dev/null +++ b/setup.py @@ -0,0 +1,20 @@ +from setuptools import setup, find_packages + +setup( + name='webviz-subsurface', + version='0.1', + description='Webviz config containers for subsurface data', + url='https://github.com/equinor/webviz-subsurface', + author='R&T Equinor', + packages=find_packages(exclude=['tests']), + entry_points={ + 'webviz_config_containers': [ + 'SummaryStats = webviz_subsurface.containers:SummaryStats', + 'ParameterDistribution = webviz_subsurface.containers:ParameterDistribution', + 'DiskUsage = webviz_subsurface.containers:DiskUsage', + 'SubsurfaceMap = webviz_subsurface.containers:SubsurfaceMap', + 'HistoryMatch = webviz_subsurface.containers:HistoryMatch' + ] + }, + zip_safe=False +) diff --git a/webviz_subsurface/__init__.py b/webviz_subsurface/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/webviz_subsurface/containers/__init__.py b/webviz_subsurface/containers/__init__.py new file mode 100644 index 000000000..2121ec7b2 --- /dev/null +++ b/webviz_subsurface/containers/__init__.py @@ -0,0 +1,36 @@ +'''### _Subsurface specific containers_ + +These are containers relevant within subsurface workflows. Most of them +rely on the setting `scratch_ensemble` configuration within the `container_settings`. +I.e. you could have +```yaml +title: Reek Webviz Demonstration +username: some_username +password: some_password + +container_settings: + scratch_ensembles: + iter-0: /scratch/my_ensemble/realization-*/iter-0 + iter-1: /scratch/my_ensemble/realization-*/iter-1 + +pages: + + - title: Front page + content: + - container: SummaryStats + ensemble: iter-0 +``` +''' + +from ._summary_stats import SummaryStats +from ._parameter_distribution import ParameterDistribution +from ._disk_usage import DiskUsage +from ._subsurface_map import SubsurfaceMap +from ._history_match import HistoryMatch + + +__all__ = ['SummaryStats', + 'ParameterDistribution', + 'DiskUsage', + 'SubsurfaceMap', + 'HistoryMatch'] diff --git a/webviz_subsurface/containers/_disk_usage.py b/webviz_subsurface/containers/_disk_usage.py new file mode 100644 index 000000000..b6a91640d --- /dev/null +++ b/webviz_subsurface/containers/_disk_usage.py @@ -0,0 +1,114 @@ +import os +from uuid import uuid4 +import pandas as pd +import dash_html_components as html +import dash_core_components as dcc +from dash.dependencies import Input, Output +from webviz_config.webviz_store import webvizstore +from webviz_config.common_cache import cache + + +class DiskUsage: + '''### Disk usage + +This container adds functionality for standard visualization of disk usage in +FMU projects. It adds a dashboard element where the user can choose between +showing disk usage, per user, either as a pie chart or as a bar chart. + +* `scratch_dir`: Path to the directory you want to show disk usage for, e.g. + `/scratch/fmu`. +* `title`: Optional title for the container. +''' + + def __init__(self, app, scratch_dir: str, + title: str = 'Disk usage'): + + self.title = title + self.scratch_dir = scratch_dir + self.chart_id = 'chart-id-{}'.format(uuid4()) + self.plot_type_id = 'plot-type-id-{}'.format(uuid4()) + self.disk_usage = get_disk_usage(self.scratch_dir) + self.date = str(self.disk_usage['date'].unique()[0]) + self.users = self.disk_usage['userid'] + self.usage = self.disk_usage['usageKB']/(1024**2) + self.set_callbacks(app) + + @property + def layout(self): + return html.Div([ + html.H2(self.title), + html.P( + f'This is the disk usage on \ + {self.scratch_dir} per user, \ + as of {self.date}.'), + dcc.RadioItems( + id=self.plot_type_id, + options=[ + {'label': i, 'value': i} + for i in ['Pie chart', 'Bar chart']], + value='Pie chart'), + dcc.Graph( + id=self.chart_id, + config={ + 'displaylogo': False, + 'modeBarButtonsToRemove': ['sendDataToCloud'] + } + ) + ]) + + def set_callbacks(self, app): + @app.callback(Output(self.chart_id, 'figure'), + [Input(self.plot_type_id, 'value')]) + def update_plot(plot_type): + if plot_type == 'Pie chart': + data = [{ + 'values': self.usage, + 'labels': self.users, + 'text': (self.usage).map('{:.2f} GB'.format), + 'textinfo': 'label', + 'textposition': 'inside', + 'hoverinfo': 'label+text', + 'type': 'pie' + }] + layout = {} + + elif plot_type == 'Bar chart': + data = [{ + 'y': self.usage, + 'x': self.users, + 'text': (self.usage).map('{:.2f} GB'.format), + 'hoverinfo': 'x+text', + 'type': 'bar' + }] + layout = { + 'yaxis': { + 'title': 'Usage in Gigabytes', + 'family': 'Equinor' + }, + 'xaxis': { + 'title': 'User name', + 'family': 'Equinor' + }, + } + + layout['height'] = 800 + layout['width'] = 1000 + layout['font'] = {'family': 'Equinor'} + layout['hoverlabel'] = {'font': {'family': 'Equinor'}} + + return {'data': data, 'layout': layout} + + def add_webvizstore(self): + return [(get_disk_usage, [{'scratch_dir': self.scratch_dir}])] + + +@cache.memoize(timeout=cache.TIMEOUT) +@webvizstore +def get_disk_usage(scratch_dir) -> pd.DataFrame: + try: + df = pd.read_csv(os.path.join(scratch_dir, 'disk_usage.csv')) + except FileNotFoundError: + raise FileNotFoundError(f'No disk usage file found at {scratch_dir}') + + last_date = sorted(list(df['date'].unique()))[-1] + return df.loc[df['date'] == last_date] diff --git a/webviz_subsurface/containers/_history_match.py b/webviz_subsurface/containers/_history_match.py new file mode 100644 index 000000000..39c99e810 --- /dev/null +++ b/webviz_subsurface/containers/_history_match.py @@ -0,0 +1,143 @@ +import json +import numpy as np +import pandas as pd +from uuid import uuid4 +from pathlib import Path +from scipy.stats import chi2 +import dash_html_components as html + +from webviz_subsurface_components import HistoryMatch +from webviz_config.webviz_store import webvizstore +from webviz_config.common_cache import cache + +from ..datainput import extract_mismatch + + +class HistoryMatch: + '''### History match + +This container visualizes the quality of the history match. + +* `ensembles`: List of the ensembles in `container_settings` to visualize. +* `observation_File`: Path to the observation `.yaml` file. +* `title`: Optional title for the container. +''' + + def __init__(self, container_settings, ensembles, observation_file: Path, + title: str='History Match'): + + self.observation_file = observation_file + self.title = title + self.ens_paths = tuple((ens, + container_settings['scratch_ensembles'][ens]) + for ens in ensembles) + + data = extract_mismatch(self.ens_paths, self.observation_file) + self.hm_data = json.dumps(self._prepareData(data)) + + def add_webvizstore(self): + return [(extract_mismatch, [{'ens_paths': self.ens_paths, + 'observation_file': self.observation_file + }])] + + def _prepareData(self, data): + data = data.copy().reset_index() + + ensemble_labels = data.ensemble_name.unique().tolist() + num_obs_groups = len(data.obs_group_name.unique()) + + data['avg_pos'] = data['total_pos'] / data['number_data_points'] + data['avg_neg'] = data['total_neg'] / data['number_data_points'] + + iterations = [] + for ensemble in ensemble_labels: + df = data[data.ensemble_name == ensemble] + iterations.append(df.groupby('obs_group_name').mean()) + + sorted_iterations = self._sortIterations(iterations) + + iterations_dict = self._iterations_to_dict(sorted_iterations, + ensemble_labels) + + confidence_sorted = _get_sorted_edges(num_obs_groups) + confidence_unsorted = _get_unsorted_edges() + + data = {} + data['iterations'] = iterations_dict + data['confidence_interval_sorted'] = confidence_sorted + data['confidence_interval_unsorted'] = confidence_unsorted + + return data + + def _sortIterations(self, iterations): + sorted_data = [] + + for df in iterations: + sorted_df = df.copy() + + sorted_data.append( + sorted_df.assign(f=sorted_df['avg_pos'] + sorted_df['avg_neg']) + .sort_values('f', ascending=False) + .drop('f', axis=1) + ) + + return sorted_data + + def _iterations_to_dict(self, iterations, labels): + retval = [] + + for iteration, label in zip(iterations, labels): + retval.append({ + 'name': label, + 'positive': iteration['avg_pos'].tolist(), + 'negative': iteration['avg_neg'].tolist(), + 'labels': iteration.index.tolist() + }) + + return retval + + @property + def layout(self): + return html.Div([ + html.H2(self.title), + HistoryMatch(id='hm', data=self.hm_data) + ]) + + +def _get_unsorted_edges(): + """P10 - P90 unsorted edge coordinates""" + + retval = { + 'low': chi2.ppf(0.1, 1), + 'high': chi2.ppf(0.9, 1) + } + + return retval + + +def _get_sorted_edges(number_observation_groups): + """P10 - P90 sorted edge coordinates""" + + monte_carlo_iterations = 100000 + + sorted_values = np.empty((number_observation_groups, + monte_carlo_iterations)) + + for i in range(monte_carlo_iterations): + sorted_values[:, i] = np.sort(np.random.chisquare( + df=1, + size=number_observation_groups)) + + sorted_values = np.flip(sorted_values, 0) + + P10 = np.percentile(sorted_values, 90, axis=1) + P90 = np.percentile(sorted_values, 10, axis=1) + + # Dictionary with two arrays (P10, P90). Each array of length equal + # to number of observation groups i.e. number of items along y axis. + # These values are to be used for drawing the stair stepped + # sorted P10-P90 area: + + coordinates = {'low': list(P10), 'high': list(P90)} + + return coordinates diff --git a/webviz_subsurface/containers/_parameter_distribution.py b/webviz_subsurface/containers/_parameter_distribution.py new file mode 100644 index 000000000..c2a51dc85 --- /dev/null +++ b/webviz_subsurface/containers/_parameter_distribution.py @@ -0,0 +1,135 @@ +from uuid import uuid4 +import pandas as pd +import dash_html_components as html +import dash_core_components as dcc +from dash.dependencies import Input, Output +from webviz_plotly.graph_objs import FanChart +from webviz_config.webviz_store import webvizstore +from webviz_config.common_cache import cache +from ..datainput import scratch_ensemble + + +class ParameterDistribution: + '''### Parameter distribution + +This container shows parameter distribution as histogram, +and correlation between the parameters as a correlation matrix. + +* `ensemble`: Which ensemble in `container_settings` to visualize. +* `title`: Optional title for the container. +''' + + def __init__(self, app, container_settings, ensemble, + title: str = 'Parameter Distribution'): + + self.title = title + + self.dropdown_vector_id = 'dropdown-vector-{}'.format(uuid4()) + self.radio_plot_type_id = 'radio-plot-type-{}'.format(uuid4()) + self.chart_id = 'chart-id-{}'.format(uuid4()) + self.histogram_div_id = 'histogram-div-{}'.format(uuid4()) + + # Finding all parameters: + self.ensemble_path = container_settings['scratch_ensembles'][ensemble] + self.parameter_columns = sorted(list( + get_parameters(self.ensemble_path).columns)) + + self.set_callbacks(app) + + @property + def layout(self): + return html.Div([ + html.H2(self.title), + html.P('Plot type:', style={'font-weight': 'bold'}), + dcc.RadioItems(id=self.radio_plot_type_id, + options=[{'label': i, 'value': i} for i in + ['Histogram', 'Pairwise correlation']], + value='Histogram'), + html.Div(id=self.histogram_div_id, + children=[ + html.P('Parameter:', + style={'font-weight': 'bold'}), + dcc.Dropdown(id=self.dropdown_vector_id, + clearable=False, + options=[{'label': i, 'value': i} for + i in self.parameter_columns], + value=self.parameter_columns[0]), + ]), + dcc.Graph(id=self.chart_id, + config={ + 'displaylogo': False, + 'modeBarButtonsToRemove': ['sendDataToCloud'] + } + ) + ]) + + def set_callbacks(self, app): + @app.callback(Output(self.chart_id, 'figure'), + [Input(self.dropdown_vector_id, 'value'), + Input(self.radio_plot_type_id, 'value')]) + def update_plot(parameter, plot_type): + if plot_type == 'Histogram': + return render_histogram(self.ensemble_path, parameter) + if plot_type == 'Pairwise correlation': + return render_matrix(self.ensemble_path) + + @app.callback(Output(self.histogram_div_id, 'style'), + [Input(self.radio_plot_type_id, 'value')]) + def toggle_parameter_selector(plot_type): + if plot_type == 'Histogram': + return {'display': 'block'} + if plot_type == 'Pairwise correlation': + return {'display': 'none'} + + def add_webvizstore(self): + return [(get_parameters, [{'ensemble_path': self.ensemble_path}])] + + +@cache.memoize(timeout=cache.TIMEOUT) +@webvizstore +def get_parameters(ensemble_path) -> pd.DataFrame: + ens = scratch_ensemble('', ensemble_path) + + return ens.parameters + + +@cache.memoize(timeout=cache.TIMEOUT) +def render_histogram(ensemble_path, parameter): + data = { + 'x': get_parameters(ensemble_path)[parameter], + 'type': 'histogram' + } + + layout = { + 'bargap': 0.05, + 'font': {'family': 'Equinor'}, + 'xaxis': {'family': 'Equinor'}, + 'yaxis': {'family': 'Equinor'}, + 'hoverlabel': {'font': {'family': 'Equinor'}} + } + + return {'data': [data], 'layout': layout} + + +@cache.memoize(timeout=cache.TIMEOUT) +def render_matrix(ensemble_path): + + data = get_parameters(ensemble_path) + values = list(data.corr().values) + + data = { + 'type': 'heatmap', + 'x': data.columns, + 'y': data.columns, + 'z': values + } + + layout = { + 'margin': {'l': 200}, + 'font': {'family': 'Equinor'}, + 'xaxis': {'family': 'Equinor'}, + 'yaxis': {'family': 'Equinor'}, + 'hoverlabel': {'font': {'family': 'Equinor'}} + } + + return {'data': [data], 'layout': layout} diff --git a/webviz_subsurface/containers/_subsurface_map.py b/webviz_subsurface/containers/_subsurface_map.py new file mode 100644 index 000000000..0484f0dc5 --- /dev/null +++ b/webviz_subsurface/containers/_subsurface_map.py @@ -0,0 +1,71 @@ +from uuid import uuid4 +import pandas as pd +import dash_html_components as html +from webviz_config.webviz_store import webvizstore +from webviz_config.common_cache import cache +from webviz_subsurface_components import Map +from ..datainput import scratch_ensemble + + +class SubsurfaceMap: + '''### Subsurface map + +This container visualizes the subsurface. Currently only supporting reservoir +model grid maps. In addition to show a map, it can visualize the flow pattern +in the simulation output using streamlines. + +* `ensemble`: Which ensemble in `container_settings` to visualize. +* `map_value`: Which property to show in the map (e.g. `PERMX`). +* `flow_value`: Which property to use for the streamlines animation + (e.g. `FLOWAT`). +* `time_step`: Which report or time step to use in the simulation output. +* `title`: Optional title for the container. +''' + + def __init__(self, container_settings, ensemble, map_value: str, + flow_value: str, time_step, title: str = 'Subsurface map'): + + self.title = title + self.map_id = 'map-{}'.format(uuid4()) + self.map_value = map_value + self.flow_value = flow_value + self.time_step = time_step + + self.ensemble_path = container_settings['scratch_ensembles'][ensemble] + self.map_data = get_map_data(self.ensemble_path, self.map_value, + self.flow_value, self.time_step) + + @property + def layout(self): + return html.Div([ + html.H2(self.title), + Map(id=self.map_id, data=self.map_data.to_json()) + ]) + + def add_webvizstore(self): + return [(get_map_data, [{'ensemble_path': self.ensemble_path, + 'map_value': self.map_value, + 'flow_value': self.flow_value, + 'time_step': self.time_step}])] + + +@cache.memoize(timeout=cache.TIMEOUT) +@webvizstore +def get_map_data(ensemble_path, map_value, flow_value, + time_step) -> pd.DataFrame: + + ens = scratch_ensemble('', ensemble_path) + + grid = ens.get_eclgrid([map_value, f'{flow_value}I+', f'{flow_value}J+'], + report=time_step) + + grid['value'] = grid[map_value] + grid['FLOWI+'] = grid[f'{flow_value}I+'] + grid['FLOWJ+'] = grid[f'{flow_value}J+'] + + # Webviz map component uses different corner point terminology than libecl + for (new, old) in [('x0', 'x1'), ('x1', 'x2'), ('x2', 'x4'), + ('y0', 'y1'), ('y1', 'y2'), ('y2', 'y4')]: + grid[new] = grid[old] + + return grid diff --git a/webviz_subsurface/containers/_summary_stats.py b/webviz_subsurface/containers/_summary_stats.py new file mode 100644 index 000000000..53cc69131 --- /dev/null +++ b/webviz_subsurface/containers/_summary_stats.py @@ -0,0 +1,137 @@ +from uuid import uuid4 +import pandas as pd +import dash_html_components as html +import dash_core_components as dcc +from dash.dependencies import Input, Output +from webviz_plotly.graph_objs import FanChart +from webviz_config.webviz_store import webvizstore +from webviz_config.common_cache import cache +from ..datainput import scratch_ensemble + + +class SummaryStats: + '''### Summary statistics + +This container visualizes simulation profiles, both per realization and +statistical plots (min, max, mean, p10, p90). + +* `ensemble`: Which ensemble in `container_settings` to visualize. +* `sampling`: Optional. Either `monthly` or `yearly`. Default is `monthly`. +* `title`: Optional title for the container. +''' + + def __init__(self, app, container_settings, ensemble, + sampling: str='monthly', title: str='Simulation time series'): + + self.title = title + self.dropwdown_vector_id = 'dropdown-vector-{}'.format(uuid4()) + self.sampling = sampling + self.radio_plot_type_id = 'radio-plot-type-{}'.format(uuid4()) + self.chart_id = 'chart-id-{}'.format(uuid4()) + + # Finding all summary vectors: + self.ensemble_path = container_settings['scratch_ensembles'][ensemble] + + self.smry_columns = sorted(list(get_summary_data(self.ensemble_path, + self.sampling) + .drop(columns=['DATE', 'REAL']) + .columns)) + + self.set_callbacks(app) + + @property + def layout(self): + return html.Div([ + html.H2(self.title), + html.P('Summary Vector:', style={'font-weight': 'bold'}), + dcc.Dropdown(id=self.dropwdown_vector_id, + clearable=False, + options=[{'label': i, 'value': i} + for i in self.smry_columns], + value=self.smry_columns[0]), + html.P('Plot type:', style={'font-weight': 'bold'}), + dcc.RadioItems(id=self.radio_plot_type_id, + options=[{'label': i, 'value': i} + for i in ['Realizations', 'Statistics']], + value='Realizations'), + dcc.Graph(id=self.chart_id, + config={ + 'displaylogo': False, + 'modeBarButtonsToRemove': ['sendDataToCloud'] + } + ) + ]) + + def set_callbacks(self, app): + @app.callback(Output(self.chart_id, 'figure'), + [Input(self.dropwdown_vector_id, 'value'), + Input(self.radio_plot_type_id, 'value')]) + def update_plot(vector, summary_plot_type): + if summary_plot_type == 'Realizations': + return render_realization_plot( + self.ensemble_path, + self.sampling, vector) + if summary_plot_type == 'Statistics': + return render_stat_plot( + self.ensemble_path, + self.sampling, vector) + + def add_webvizstore(self): + return [(get_summary_data, [{'ensemble_path': self.ensemble_path, + 'sampling': self.sampling, + 'statistics': False}]), + (get_summary_data, [{'ensemble_path': self.ensemble_path, + 'sampling': self.sampling, + 'statistics': True}])] + + +@cache.memoize(timeout=cache.TIMEOUT) +@webvizstore +def get_summary_data(ensemble_path, sampling, + statistics=False) -> pd.DataFrame: + + ens = scratch_ensemble('', ensemble_path) + if statistics: + return ens.get_smry_stats(time_index=sampling) + else: + return ens.get_smry(time_index=sampling) + + +@cache.memoize(timeout=cache.TIMEOUT) +def render_realization_plot(ensemble_path, sampling, vector): + + data = get_summary_data(ensemble_path, + sampling)[['REAL', 'DATE', vector]] + + traces = [{ + 'x': df['DATE'], + 'customdata': df['REAL'], + 'y': df[vector], + 'name': name, + 'type': 'line' + } for name, df in data.groupby('REAL') if name != 'DATE'] + + layout = { + 'hovermode': 'closest', + 'barmode': 'overlay', + 'bargap': 0.05, + 'xaxis': {'title': 'Date', 'family': 'Equinor'}, + 'yaxis': {'title': vector, 'family': 'Equinor'}, + 'font': {'family': 'Equinor'}, + 'hoverlabel': {'font': {'family': 'Equinor'}}, + } + + return {'data': traces, 'layout': layout} + + +@cache.memoize(timeout=cache.TIMEOUT) +def render_stat_plot(ensemble_path, sampling, vector): + + data = get_summary_data(ensemble_path, sampling, + statistics=True)[vector].unstack().transpose() + + data['name'] = vector + data.rename(index=str, inplace=True, + columns={"minimum": "min", "maximum": "max"}) + + return FanChart(data.iterrows()) diff --git a/webviz_subsurface/datainput/__init__.py b/webviz_subsurface/datainput/__init__.py new file mode 100644 index 000000000..efbb2f1e6 --- /dev/null +++ b/webviz_subsurface/datainput/__init__.py @@ -0,0 +1,70 @@ +try: + import fmu.ensemble +except ImportError: # fmu.ensemble is an optional dependency, e.g. + pass # for a portable webviz instance, it is never used. + +import pandas as pd +from pathlib import Path +from webviz_config.common_cache import cache +from webviz_config.webviz_store import webvizstore + + +@cache.memoize(timeout=cache.TIMEOUT) +def scratch_ensemble(ensemble_name, ensemble_path): + return fmu.ensemble.ScratchEnsemble(ensemble_name, ensemble_path) + + +@cache.memoize(timeout=cache.TIMEOUT) +@webvizstore +def extract_mismatch(ens_paths, observation_file: Path) -> pd.DataFrame: + """Convert the fmu-ensemble mismatch dataframe into the the format + suitable for the interactive history match visualization. + """ + + list_ens = [scratch_ensemble(ensemble_name, path) + for (ensemble_name, path) in ens_paths] + + ens_data = fmu.ensemble.EnsembleSet("HistoryMatch", list_ens) + + df_mismatch = fmu.ensemble.Observations(str(observation_file))\ + .mismatch(ens_data) + + df_mismatch['NORMALISED_MISMATCH'] = \ + df_mismatch['L2'] / (df_mismatch['MEASERROR'] ** 2) + + # Create a dataframe containing number of + # observation points within each observation key: + df_count = df_mismatch.groupby(['OBSKEY', 'REAL', 'ENSEMBLE'])\ + .size()\ + .to_frame('COUNT')\ + .reset_index()\ + .drop_duplicates(['OBSKEY'], keep='first')\ + .drop(columns=['REAL', 'ENSEMBLE']) + + # 1) Sum the normalised misfit (grouped by obskey, misfit sign + # realizaton and ensemble. + # 2) Pivot the dataframe such that instead of two rows wrt. positive and + # negative misfit, we get two columns. + # 3) Replace NaN values with 0 (NaN happens e.g. for the summed negative + # misfit if e.g. all misfit values are positive. + # 4) Drop the column name 0 (webviz don't need summed misfit over all + # observation points with zero misfit :p) + # 5) Merge in the COUNT column. + # 6) Rename columns such that the columns from fmu.ensemble corresponds + # to those used in the webviz history match visualization. + return df_mismatch.groupby(['OBSKEY', 'SIGN', 'REAL', 'ENSEMBLE'])\ + .sum()[['NORMALISED_MISMATCH']]\ + .pivot_table(index=['OBSKEY', 'REAL', 'ENSEMBLE'], + columns='SIGN', + values='NORMALISED_MISMATCH' + )\ + .reset_index()\ + .fillna(0)\ + .drop(columns=[0])\ + .merge(df_count, on='OBSKEY', how='left')\ + .rename(columns={'OBSKEY': 'obs_group_name', + 'REAL': 'realization', + 'ENSEMBLE': 'ensemble_name', + 'COUNT': 'number_data_points', + 1: 'total_pos', + -1: 'total_neg'})