From 8d5a8d6e4e2cb5013344179a13207f2e2acca8b6 Mon Sep 17 00:00:00 2001
From: Hans Kallekleiv <hakal@statoil.com>
Date: Tue, 28 May 2019 18:02:14 +0200
Subject: [PATCH] Reorganized data input (#19)

* Reorganized data input

* Removed comment string
---
 webviz_subsurface/datainput/__init__.py       | 73 ++-----------------
 webviz_subsurface/datainput/_history_match.py | 70 ++++++++++++++++++
 2 files changed, 77 insertions(+), 66 deletions(-)
 create mode 100644 webviz_subsurface/datainput/_history_match.py

diff --git a/webviz_subsurface/datainput/__init__.py b/webviz_subsurface/datainput/__init__.py
index 3841edf2c..5ea0b0ba1 100644
--- a/webviz_subsurface/datainput/__init__.py
+++ b/webviz_subsurface/datainput/__init__.py
@@ -1,70 +1,11 @@
-try:
-    import fmu.ensemble
-except ImportError:  # fmu.ensemble is an optional dependency, e.g.
-    pass             # for a portable webviz instance, it is never used.
+'''### _Subsurface data input_
 
-import pandas as pd
-from pathlib import Path
-from webviz_config.common_cache import cache
-from webviz_config.webviz_store import webvizstore
+Contains data processing functions used in the containers.
+Some of the scripts are dependent on FMU postprocessing scripts
+that will be made open source in the near future.
+'''
 
+from ._history_match import extract_mismatch, scratch_ensemble
 
-@cache.memoize(timeout=cache.TIMEOUT)
-def scratch_ensemble(ensemble_name, ensemble_path):
-    return fmu.ensemble.ScratchEnsemble(ensemble_name, ensemble_path)
 
-
-@cache.memoize(timeout=cache.TIMEOUT)
-@webvizstore
-def extract_mismatch(ens_paths, observation_file: Path) -> pd.DataFrame:
-    """Convert the fmu-ensemble mismatch dataframe into the the format
-    suitable for the interactive history match visualization.
-    """
-
-    list_ens = [scratch_ensemble(ensemble_name, path)
-                for (ensemble_name, path) in ens_paths]
-
-    ens_data = fmu.ensemble.EnsembleSet("HistoryMatch", list_ens)
-
-    df_mismatch = fmu.ensemble.Observations(str(observation_file))\
-                     .mismatch(ens_data)
-
-    df_mismatch['NORMALISED_MISMATCH'] = \
-        df_mismatch['L2'] / (df_mismatch['MEASERROR'] ** 2)
-
-    # Create a dataframe containing number of
-    # observation points within each observation key:
-    df_count = df_mismatch.groupby(['OBSKEY', 'REAL', 'ENSEMBLE'])\
-                          .size()\
-                          .to_frame('COUNT')\
-                          .reset_index()\
-                          .drop_duplicates(['OBSKEY'], keep='first')\
-                          .drop(columns=['REAL', 'ENSEMBLE'])
-
-    # 1) Sum the normalised misfit (grouped by obskey, misfit sign
-    #    realizaton and ensemble.
-    # 2) Pivot the dataframe such that instead of two rows wrt. positive and
-    #    negative misfit, we get two columns.
-    # 3) Replace NaN values with 0 (NaN happens e.g. for the summed negative
-    #    misfit if e.g. all misfit values are positive.
-    # 4) Drop the column name 0 (webviz don't need summed misfit over all
-    #    observation points with zero misfit :p)
-    # 5) Merge in the COUNT column.
-    # 6) Rename columns such that the columns from fmu.ensemble corresponds
-    #    to those used in the webviz history match visualization.
-    return df_mismatch.groupby(['OBSKEY', 'SIGN', 'REAL', 'ENSEMBLE'])\
-                      .sum()[['NORMALISED_MISMATCH']]\
-                      .pivot_table(index=['OBSKEY', 'REAL', 'ENSEMBLE'],
-                                   columns='SIGN',
-                                   values='NORMALISED_MISMATCH'
-                                   )\
-                      .reset_index()\
-                      .fillna(0)\
-                      .drop(columns=[0], errors='ignore')\
-                      .merge(df_count, on='OBSKEY', how='left')\
-                      .rename(columns={'OBSKEY': 'obs_group_name',
-                                       'REAL': 'realization',
-                                       'ENSEMBLE': 'ensemble_name',
-                                       'COUNT': 'number_data_points',
-                                       1: 'total_pos',
-                                       -1: 'total_neg'})
+__all__ = ['scratch_ensemble', 'extract_mismatch']
diff --git a/webviz_subsurface/datainput/_history_match.py b/webviz_subsurface/datainput/_history_match.py
new file mode 100644
index 000000000..3841edf2c
--- /dev/null
+++ b/webviz_subsurface/datainput/_history_match.py
@@ -0,0 +1,70 @@
+try:
+    import fmu.ensemble
+except ImportError:  # fmu.ensemble is an optional dependency, e.g.
+    pass             # for a portable webviz instance, it is never used.
+
+import pandas as pd
+from pathlib import Path
+from webviz_config.common_cache import cache
+from webviz_config.webviz_store import webvizstore
+
+
+@cache.memoize(timeout=cache.TIMEOUT)
+def scratch_ensemble(ensemble_name, ensemble_path):
+    return fmu.ensemble.ScratchEnsemble(ensemble_name, ensemble_path)
+
+
+@cache.memoize(timeout=cache.TIMEOUT)
+@webvizstore
+def extract_mismatch(ens_paths, observation_file: Path) -> pd.DataFrame:
+    """Convert the fmu-ensemble mismatch dataframe into the the format
+    suitable for the interactive history match visualization.
+    """
+
+    list_ens = [scratch_ensemble(ensemble_name, path)
+                for (ensemble_name, path) in ens_paths]
+
+    ens_data = fmu.ensemble.EnsembleSet("HistoryMatch", list_ens)
+
+    df_mismatch = fmu.ensemble.Observations(str(observation_file))\
+                     .mismatch(ens_data)
+
+    df_mismatch['NORMALISED_MISMATCH'] = \
+        df_mismatch['L2'] / (df_mismatch['MEASERROR'] ** 2)
+
+    # Create a dataframe containing number of
+    # observation points within each observation key:
+    df_count = df_mismatch.groupby(['OBSKEY', 'REAL', 'ENSEMBLE'])\
+                          .size()\
+                          .to_frame('COUNT')\
+                          .reset_index()\
+                          .drop_duplicates(['OBSKEY'], keep='first')\
+                          .drop(columns=['REAL', 'ENSEMBLE'])
+
+    # 1) Sum the normalised misfit (grouped by obskey, misfit sign
+    #    realizaton and ensemble.
+    # 2) Pivot the dataframe such that instead of two rows wrt. positive and
+    #    negative misfit, we get two columns.
+    # 3) Replace NaN values with 0 (NaN happens e.g. for the summed negative
+    #    misfit if e.g. all misfit values are positive.
+    # 4) Drop the column name 0 (webviz don't need summed misfit over all
+    #    observation points with zero misfit :p)
+    # 5) Merge in the COUNT column.
+    # 6) Rename columns such that the columns from fmu.ensemble corresponds
+    #    to those used in the webviz history match visualization.
+    return df_mismatch.groupby(['OBSKEY', 'SIGN', 'REAL', 'ENSEMBLE'])\
+                      .sum()[['NORMALISED_MISMATCH']]\
+                      .pivot_table(index=['OBSKEY', 'REAL', 'ENSEMBLE'],
+                                   columns='SIGN',
+                                   values='NORMALISED_MISMATCH'
+                                   )\
+                      .reset_index()\
+                      .fillna(0)\
+                      .drop(columns=[0], errors='ignore')\
+                      .merge(df_count, on='OBSKEY', how='left')\
+                      .rename(columns={'OBSKEY': 'obs_group_name',
+                                       'REAL': 'realization',
+                                       'ENSEMBLE': 'ensemble_name',
+                                       'COUNT': 'number_data_points',
+                                       1: 'total_pos',
+                                       -1: 'total_neg'})