More flexibility in GroupTree (#1138)

* Two new parameters in GroupTree for more flexibility, terminal_node and excl_well_startswith * Implemented new function get_filtered_dataframe in gruptree_model * New input excl_well_endswith * Improved Exception message for injection nodes in BRANPROP trees * Started on unit tests for the gruptree model * Fixed CI workflow issues * Improved tests with fixture creating the gruptree model * Improved handling of injection for BRANPROP nodes * Made terminal_node optional in get_filtered_dataframe * New input parameter tree_type which is GRUPTREE by default, and some other improvements * Relaxed the requirements on summary vectors * New StrEnum DataType for oilrate, gasrate etc * Eased the requirement on node summary vectors, they are now optional * New type EdgeOrNode * Small docstring update * Implemented mock GruptreeModel class with some new tests * Allowed tree_type to be defaulted in gruptree_model, in which case the tree is automatically selected * Changelog entry * Small update to comment * Set back scipy version in setup.py and installed v 1.9.2 in CI workflow * Updated CI workflow Co-authored-by: Øyvind Lind-Johansen <[email protected]>
equinor · Oct 26, 2022 · 7c35594 · 7c35594
1 parent 1ad6de4
commit 7c35594
Show file tree

Hide file tree

Showing 9 changed files with 459 additions and 119 deletions.
diff --git a/.github/workflows/subsurface.yml b/.github/workflows/subsurface.yml
@@ -52,10 +52,13 @@ jobs:
         pip install "bleach<5"  # https://github.com/equinor/webviz-config/issues/586
         pip install "werkzeug<2.1"  # ...while waiting for https://github.com/plotly/dash/issues/1992
         pip install "selenium<4.3"  # breaking change in selenium==4.3
+        pip install "scipy<1.9.3"   # breaking change in scipy==1.9.3
+        pip install "pytest<7.2.0"
+        pip install "pytest-xdist<3.0"
         pip install .
-        
+
         # Testing against our latest release (including pre-releases)
-        pip install --pre --upgrade webviz-config webviz-core-components webviz-subsurface-components 
+        pip install --pre --upgrade webviz-config webviz-core-components webviz-subsurface-components
 
     - name: 📦 Install test dependencies
       run: |

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [UNRELEASED] - YYYY-MM-DD
 
+### Changed
+- [#1138](https://github.com/equinor/webviz-subsurface/pull/1138) - More flexibility in the input to the `GroupTree` plugin. New input parameters `terminal_node`, `tree_type`, `excl_well_startswith` and `excl_well_endswith`. Relaxed requirements on summary input.
+
 ## [0.2.15] - 2022-10-10
 
 ### Added

diff --git a/tests/data/gruptree.csv b/tests/data/gruptree.csv
@@ -0,0 +1,6 @@
+DATE,CHILD,KEYWORD,PARENT,VFP_TABLE,ALQ,ALQ_SURFACE_DENSITY,TERMINAL_PRESSURE,AS_CHOKE,ADD_GAS_LIFT_GAS,CHOKE_GROUP,SOURCE_SINK_GROUP,NETWORK_VALUE_TYPE
+2023-01-01,FIELD,GRUPTREE,,,,,,,,,,
+2023-01-01,NODE,GRUPTREE,FIELD,,,,,,,,,
+2023-01-01,WELL,WELSPECS,NODE,,,,,,,,,
+2023-01-01,FIELD,BRANPROP,,,,,,,,,,
+2023-01-01,NODE,BRANPROP,FIELD,9999.0,0.0,NONE,25.0,NO,NO,,,PROD
diff --git a/tests/unit_tests/model_tests/test_gruptree_model.py b/tests/unit_tests/model_tests/test_gruptree_model.py
@@ -0,0 +1,92 @@
+import datetime
+from pathlib import Path
+from typing import Optional
+
+import pandas as pd
+import pytest
+from pandas._testing import assert_frame_equal
+
+from webviz_subsurface._models.gruptree_model import GruptreeModel, TreeType
+
+CHECK_COLUMNS = ["DATE", "CHILD", "KEYWORD", "PARENT"]
+ENSEMBLE = "01_drogon_ahm"
+GRUPTREE_FILE = "share/results/tables/gruptree.csv"
+
+
+@pytest.fixture(name="gruptree_model")
+def fixture_model(testdata_folder) -> GruptreeModel:
+    ens_path = Path(testdata_folder) / ENSEMBLE / "realization-*" / "iter-0"
+    return GruptreeModel(
+        ens_name="iter-0",
+        ens_path=ens_path,
+        gruptree_file=GRUPTREE_FILE,
+        tree_type="GRUPTREE",
+    )
+
+
+# Mock class that loads local csv file
+class MockGruptreeModel(GruptreeModel):
+    # pylint: disable=super-init-not-called
+    def __init__(self, tree_type: Optional[TreeType] = None):
+        self._tree_type = tree_type
+        df_files = pd.DataFrame([{"REAL": 0, "FULLPATH": "tests/data/gruptree.csv"}])
+        self._dataframe = self.read_ensemble_gruptree(df_files=df_files)
+
+
+@pytest.mark.usefixtures("app")
+def test_gruptree_model_init(testdata_folder, gruptree_model: GruptreeModel):
+
+    # Check that there is only one REAL (means that the gruptree is
+    # the same for all realizations)
+    assert gruptree_model.dataframe["REAL"].nunique() == 1
+
+    # Load gruptree table from realization-0 and compare with
+    # the dataframe from the gruptree_model
+    r0_path = f"{testdata_folder}/{ENSEMBLE}/realization-0/iter-0/{GRUPTREE_FILE}"
+    exp_df = pd.read_csv(r0_path)
+    exp_df["DATE"] = pd.to_datetime(exp_df["DATE"])
+    exp_df = exp_df.where(pd.notnull(exp_df), None)
+
+    assert_frame_equal(gruptree_model.dataframe[CHECK_COLUMNS], exp_df[CHECK_COLUMNS])
+
+
+@pytest.mark.usefixtures("app")
+def test_get_filtered_dataframe(gruptree_model: GruptreeModel):
+
+    # Test the get_filtered_dataframe function with terminal node different than FIELD
+    filtered_df = gruptree_model.get_filtered_dataframe(terminal_node="OP")
+    filtered_df = filtered_df[
+        filtered_df["DATE"] == filtered_df["DATE"].max()
+    ].reset_index()
+    exp_filtered_df = pd.DataFrame(
+        columns=["DATE", "CHILD", "KEYWORD", "PARENT"],
+        data=[
+            [datetime.datetime(year=2018, month=11, day=17), "OP", "GRUPTREE", "FIELD"],
+            [datetime.datetime(year=2018, month=11, day=17), "A1", "WELSPECS", "OP"],
+            [datetime.datetime(year=2018, month=11, day=17), "A2", "WELSPECS", "OP"],
+            [datetime.datetime(year=2018, month=11, day=17), "A3", "WELSPECS", "OP"],
+            [datetime.datetime(year=2018, month=11, day=17), "A4", "WELSPECS", "OP"],
+        ],
+    )
+    assert_frame_equal(filtered_df[CHECK_COLUMNS], exp_filtered_df)
+
+    # Test excl_wells_startswith and excl_wells_endswith
+    assert set(
+        gruptree_model.get_filtered_dataframe(
+            excl_well_startswith=["R_"],
+            excl_well_endswith=["3", "5"],
+        )["CHILD"].unique()
+    ) == {"FIELD", "OP", "RFT", "WI", "A1", "A2", "A4", "A6"}
+
+
+def test_tree_type_filtering():
+
+    mock_model = MockGruptreeModel(tree_type=TreeType.GRUPTREE)
+    assert "BRANPROP" not in mock_model.dataframe["KEYWORD"].unique()
+
+    mock_model = MockGruptreeModel(tree_type=TreeType.BRANPROP)
+    assert "GRUPTREE" not in mock_model.dataframe["KEYWORD"].unique()
+
+    # If tree_type is defaulted then the BRANPROP tree is selected
+    mock_model = MockGruptreeModel()
+    assert "GRUPTREE" not in mock_model.dataframe["KEYWORD"].unique()
diff --git a/tests/unit_tests/plugin_tests/test_grouptree.py b/tests/unit_tests/plugin_tests/test_grouptree.py
@@ -170,7 +170,7 @@ def test_add_nodetype(
         "CHILD"
     ].unique()
 
-    output = add_nodetype(gruptree_df, provider, wells)
+    output = add_nodetype(gruptree_df, provider, wells, "FIELD")
     pd.testing.assert_frame_equal(
         output[columns_to_check], expected_df[columns_to_check]
     )
diff --git a/webviz_subsurface/_models/gruptree_model.py b/webviz_subsurface/_models/gruptree_model.py
@@ -1,12 +1,18 @@
 from pathlib import Path
-from typing import Callable, Dict, List, Tuple
+from typing import Callable, Dict, List, Optional, Tuple
 
 import pandas as pd
+from webviz_config.utils import StrEnum
 from webviz_config.webviz_store import webvizstore
 
 from webviz_subsurface._datainput.fmu_input import scratch_ensemble
 
 
+class TreeType(StrEnum):
+    GRUPTREE = "GRUPTREE"
+    BRANPROP = "BRANPROP"
+
+
 class GruptreeModel:
     """Facilitates loading of gruptree tables. Can be reused in all
     plugins that are using grouptree data and extended with additional
@@ -18,13 +24,14 @@ def __init__(
         ens_name: str,
         ens_path: Path,
         gruptree_file: str,
-        remove_gruptree_if_branprop: bool = True,
+        tree_type: Optional[str] = None,
     ):
         self._ens_name = ens_name
         self._ens_path = ens_path
         self._gruptree_file = gruptree_file
-        self._remove_gruptree_if_branprop = remove_gruptree_if_branprop
+        self._tree_type = TreeType(tree_type) if tree_type is not None else None
         self._dataframe = self.read_ensemble_gruptree()
+
         self._gruptrees_are_equal_over_reals = (
             self._dataframe["REAL"].nunique() == 1
             if not self._dataframe.empty
@@ -46,6 +53,74 @@ def dataframe(self) -> pd.DataFrame:
         """
         return self._dataframe
 
+    def get_filtered_dataframe(
+        self,
+        terminal_node: Optional[str] = None,
+        excl_well_startswith: Optional[List[str]] = None,
+        excl_well_endswith: Optional[List[str]] = None,
+    ) -> pd.DataFrame:
+        """This function returns a sub-set of the rows in the gruptree dataframe
+        filtered according to the input arguments:
+
+        - terminal_node: returns the terminal node and all nodes below it in the
+        tree (for all realizations and dates)
+        - excl_well_startswith: removes WELSPECS rows where CHILD starts with any
+        of the entries in the list.
+        - excl_well_endswith: removes WELSPECS rows where CHILD ends with any
+        of the entries in the list.
+
+        """
+        df = self._dataframe
+
+        if terminal_node is not None:
+
+            if terminal_node not in self._dataframe["CHILD"].unique():
+                raise ValueError(
+                    f"Terminal node '{terminal_node}' not found in 'CHILD' column "
+                    "of the gruptree data."
+                )
+
+            branch_nodes = self._get_branch_nodes(terminal_node)
+            df = self._dataframe[self._dataframe["CHILD"].isin(branch_nodes)]
+
+        def filter_wells(
+            dframe: pd.DataFrame, well_name_criteria: Callable
+        ) -> pd.DataFrame:
+            return dframe[
+                (dframe["KEYWORD"] != "WELSPECS")
+                | (
+                    (dframe["KEYWORD"] == "WELSPECS")
+                    & (~well_name_criteria(dframe["CHILD"]))
+                )
+            ]
+
+        if excl_well_startswith is not None:
+            # Filter out WELSPECS rows where CHILD starts with any element in excl_well_startswith
+            # Conversion to tuple done outside lambda due to mypy
+            excl_well_startswith_tuple = tuple(excl_well_startswith)
+            df = filter_wells(
+                df, lambda x: x.str.startswith(excl_well_startswith_tuple)
+            )
+
+        if excl_well_endswith is not None:
+            # Filter out WELSPECS rows where CHILD ends with any element in excl_well_endswith
+            # Conversion to tuple done outside lambda due to mypy
+            excl_well_endswith_tuple = tuple(excl_well_endswith)
+            df = filter_wells(df, lambda x: x.str.endswith(excl_well_endswith_tuple))
+
+        return df.copy()
+
+    def _get_branch_nodes(self, terminal_node: str) -> List[str]:
+        """The function is using recursion to find all wells below the node
+        in the three.
+        """
+        branch_nodes = [terminal_node]
+
+        children = self._dataframe[self._dataframe["PARENT"] == terminal_node]
+        for _, childrow in children.iterrows():
+            branch_nodes.extend(self._get_branch_nodes(childrow["CHILD"]))
+        return branch_nodes
+
     @property
     def gruptrees_are_equal_over_reals(self) -> bool:
         """Returns true if gruptrees are exactly equal in all realizations."""
@@ -69,19 +144,27 @@ def webviz_store(self) -> Tuple[Callable, List[Dict]]:
         )
 
     @webvizstore
-    def read_ensemble_gruptree(self) -> pd.DataFrame:
+    def read_ensemble_gruptree(
+        self, df_files: Optional[pd.DataFrame] = None
+    ) -> pd.DataFrame:
         """Reads the gruptree files for an ensemble from the scratch disk. These
         files can be exported in the FMU workflow using the ECL2CSV
         forward model with subcommand gruptree.
 
-        If BRANPROP is found in the KEYWORD column, then GRUPTREE rows
-        are filtered out.
+        If tree_type == BRANPROP then GRUPTREE rows are filtered out
+        If tree_type == GRUPTREE then BRANPROP rows are filtered out
 
         If the trees are equal in every realization, only one realization is kept.
-        """
 
-        ens = scratch_ensemble(self._ens_name, self._ens_path, filter_file="OK")
-        df_files = ens.find_files(self._gruptree_file)
+        It is possible to pass a dataframe of file names (only columns required is
+        REAL and FULLPATH). This is mostly intended for testing. If this is defaulted
+        the files are found automatically using the scratch_ensemble.
+        """
+        if df_files is None:
+            ens = scratch_ensemble(
+                self._ens_name, str(self._ens_path), filter_file="OK"
+            )
+            df_files = ens.find_files(self._gruptree_file)
 
         if df_files.empty:
             return pd.DataFrame()
@@ -93,12 +176,29 @@ def read_ensemble_gruptree(self) -> pd.DataFrame:
         gruptrees_are_equal = True
         for i, row in df_files.iterrows():
             df_real = pd.read_csv(row["FULLPATH"])
+            unique_keywords = df_real["KEYWORD"].unique()
 
-            if (
-                self._remove_gruptree_if_branprop
-                and "BRANPROP" in df_real["KEYWORD"].unique()
-            ):
-                df_real = df_real[df_real["KEYWORD"] != "GRUPTREE"]
+            if self._tree_type is None:
+                # if tree_type is None, then we filter out GRUPTREE if BRANPROP
+                # exists, if else we do nothing.
+                if TreeType.BRANPROP.value in unique_keywords:
+                    df_real = df_real[df_real["KEYWORD"] != TreeType.GRUPTREE.value]
+
+            else:
+                if self._tree_type.value not in unique_keywords:
+                    raise ValueError(
+                        f"Keyword {self._tree_type.value} not found in {row['FULLPATH']}"
+                    )
+                if (
+                    self._tree_type == TreeType.GRUPTREE
+                    and TreeType.BRANPROP.value in unique_keywords
+                ):
+                    # Filter out BRANPROP entries
+                    df_real = df_real[df_real["KEYWORD"] != TreeType.BRANPROP.value]
+
+                if self._tree_type == TreeType.BRANPROP:
+                    # Filter out GRUPTREE entries
+                    df_real = df_real[df_real["KEYWORD"] != TreeType.GRUPTREE.value]
 
             if (
                 i > 0