From 7c35594495cebb4d57584e7c2f8b347751b9f778 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=98yvind=20Lind-Johansen?=
 <47847084+lindjoha@users.noreply.github.com>
Date: Wed, 26 Oct 2022 12:02:37 +0200
Subject: [PATCH] More flexibility in `GroupTree` (#1138)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Two new parameters in GroupTree for more flexibility, terminal_node and excl_well_startswith

* Implemented new function get_filtered_dataframe in gruptree_model

* New input excl_well_endswith

* Improved Exception message for injection nodes in BRANPROP trees

* Started on unit tests for the gruptree model

* Fixed CI workflow issues

* Improved tests with fixture creating the gruptree model

* Improved handling of injection for BRANPROP nodes

* Made terminal_node optional in get_filtered_dataframe

* New input parameter tree_type which is GRUPTREE by default, and some other improvements

* Relaxed the requirements on summary vectors

* New StrEnum DataType for oilrate, gasrate etc

* Eased the requirement on node summary vectors, they are now optional

* New type EdgeOrNode

* Small docstring update

* Implemented mock GruptreeModel class with some new tests

* Allowed tree_type to be defaulted in gruptree_model, in which case the tree is automatically selected

* Changelog entry

* Small update to comment

* Set back scipy version in setup.py and installed v 1.9.2 in CI workflow

* Updated CI workflow

Co-authored-by: Øyvind Lind-Johansen <olind@equinor.com>
---
 .github/workflows/subsurface.yml              |   7 +-
 CHANGELOG.md                                  |   3 +
 tests/data/gruptree.csv                       |   6 +
 .../model_tests/test_gruptree_model.py        |  92 ++++++
 .../unit_tests/plugin_tests/test_grouptree.py |   2 +-
 webviz_subsurface/_models/gruptree_model.py   | 128 ++++++++-
 .../plugins/_group_tree/_plugin.py            |  62 ++++-
 .../plugins/_group_tree/_types.py             |  16 ++
 .../_utils/_ensemble_group_tree_data.py       | 262 +++++++++++-------
 9 files changed, 459 insertions(+), 119 deletions(-)
 create mode 100644 tests/data/gruptree.csv
 create mode 100644 tests/unit_tests/model_tests/test_gruptree_model.py

diff --git a/.github/workflows/subsurface.yml b/.github/workflows/subsurface.yml
index 3ca857d7c..f9a849cd9 100644
--- a/.github/workflows/subsurface.yml
+++ b/.github/workflows/subsurface.yml
@@ -52,10 +52,13 @@ jobs:
         pip install "bleach<5"  # https://github.com/equinor/webviz-config/issues/586
         pip install "werkzeug<2.1"  # ...while waiting for https://github.com/plotly/dash/issues/1992
         pip install "selenium<4.3"  # breaking change in selenium==4.3
+        pip install "scipy<1.9.3"   # breaking change in scipy==1.9.3
+        pip install "pytest<7.2.0"
+        pip install "pytest-xdist<3.0"
         pip install .
-        
+
         # Testing against our latest release (including pre-releases)
-        pip install --pre --upgrade webviz-config webviz-core-components webviz-subsurface-components 
+        pip install --pre --upgrade webviz-config webviz-core-components webviz-subsurface-components
 
     - name: 📦 Install test dependencies
       run: |
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c5da536bd..4644c2daf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [UNRELEASED] - YYYY-MM-DD
 
+### Changed
+- [#1138](https://github.com/equinor/webviz-subsurface/pull/1138) - More flexibility in the input to the `GroupTree` plugin. New input parameters `terminal_node`, `tree_type`, `excl_well_startswith` and `excl_well_endswith`. Relaxed requirements on summary input.
+
 ## [0.2.15] - 2022-10-10
 
 ### Added
diff --git a/tests/data/gruptree.csv b/tests/data/gruptree.csv
new file mode 100644
index 000000000..a4317ddff
--- /dev/null
+++ b/tests/data/gruptree.csv
@@ -0,0 +1,6 @@
+DATE,CHILD,KEYWORD,PARENT,VFP_TABLE,ALQ,ALQ_SURFACE_DENSITY,TERMINAL_PRESSURE,AS_CHOKE,ADD_GAS_LIFT_GAS,CHOKE_GROUP,SOURCE_SINK_GROUP,NETWORK_VALUE_TYPE
+2023-01-01,FIELD,GRUPTREE,,,,,,,,,,
+2023-01-01,NODE,GRUPTREE,FIELD,,,,,,,,,
+2023-01-01,WELL,WELSPECS,NODE,,,,,,,,,
+2023-01-01,FIELD,BRANPROP,,,,,,,,,,
+2023-01-01,NODE,BRANPROP,FIELD,9999.0,0.0,NONE,25.0,NO,NO,,,PROD
diff --git a/tests/unit_tests/model_tests/test_gruptree_model.py b/tests/unit_tests/model_tests/test_gruptree_model.py
new file mode 100644
index 000000000..63a3190d0
--- /dev/null
+++ b/tests/unit_tests/model_tests/test_gruptree_model.py
@@ -0,0 +1,92 @@
+import datetime
+from pathlib import Path
+from typing import Optional
+
+import pandas as pd
+import pytest
+from pandas._testing import assert_frame_equal
+
+from webviz_subsurface._models.gruptree_model import GruptreeModel, TreeType
+
+CHECK_COLUMNS = ["DATE", "CHILD", "KEYWORD", "PARENT"]
+ENSEMBLE = "01_drogon_ahm"
+GRUPTREE_FILE = "share/results/tables/gruptree.csv"
+
+
+@pytest.fixture(name="gruptree_model")
+def fixture_model(testdata_folder) -> GruptreeModel:
+    ens_path = Path(testdata_folder) / ENSEMBLE / "realization-*" / "iter-0"
+    return GruptreeModel(
+        ens_name="iter-0",
+        ens_path=ens_path,
+        gruptree_file=GRUPTREE_FILE,
+        tree_type="GRUPTREE",
+    )
+
+
+# Mock class that loads local csv file
+class MockGruptreeModel(GruptreeModel):
+    # pylint: disable=super-init-not-called
+    def __init__(self, tree_type: Optional[TreeType] = None):
+        self._tree_type = tree_type
+        df_files = pd.DataFrame([{"REAL": 0, "FULLPATH": "tests/data/gruptree.csv"}])
+        self._dataframe = self.read_ensemble_gruptree(df_files=df_files)
+
+
+@pytest.mark.usefixtures("app")
+def test_gruptree_model_init(testdata_folder, gruptree_model: GruptreeModel):
+
+    # Check that there is only one REAL (means that the gruptree is
+    # the same for all realizations)
+    assert gruptree_model.dataframe["REAL"].nunique() == 1
+
+    # Load gruptree table from realization-0 and compare with
+    # the dataframe from the gruptree_model
+    r0_path = f"{testdata_folder}/{ENSEMBLE}/realization-0/iter-0/{GRUPTREE_FILE}"
+    exp_df = pd.read_csv(r0_path)
+    exp_df["DATE"] = pd.to_datetime(exp_df["DATE"])
+    exp_df = exp_df.where(pd.notnull(exp_df), None)
+
+    assert_frame_equal(gruptree_model.dataframe[CHECK_COLUMNS], exp_df[CHECK_COLUMNS])
+
+
+@pytest.mark.usefixtures("app")
+def test_get_filtered_dataframe(gruptree_model: GruptreeModel):
+
+    # Test the get_filtered_dataframe function with terminal node different than FIELD
+    filtered_df = gruptree_model.get_filtered_dataframe(terminal_node="OP")
+    filtered_df = filtered_df[
+        filtered_df["DATE"] == filtered_df["DATE"].max()
+    ].reset_index()
+    exp_filtered_df = pd.DataFrame(
+        columns=["DATE", "CHILD", "KEYWORD", "PARENT"],
+        data=[
+            [datetime.datetime(year=2018, month=11, day=17), "OP", "GRUPTREE", "FIELD"],
+            [datetime.datetime(year=2018, month=11, day=17), "A1", "WELSPECS", "OP"],
+            [datetime.datetime(year=2018, month=11, day=17), "A2", "WELSPECS", "OP"],
+            [datetime.datetime(year=2018, month=11, day=17), "A3", "WELSPECS", "OP"],
+            [datetime.datetime(year=2018, month=11, day=17), "A4", "WELSPECS", "OP"],
+        ],
+    )
+    assert_frame_equal(filtered_df[CHECK_COLUMNS], exp_filtered_df)
+
+    # Test excl_wells_startswith and excl_wells_endswith
+    assert set(
+        gruptree_model.get_filtered_dataframe(
+            excl_well_startswith=["R_"],
+            excl_well_endswith=["3", "5"],
+        )["CHILD"].unique()
+    ) == {"FIELD", "OP", "RFT", "WI", "A1", "A2", "A4", "A6"}
+
+
+def test_tree_type_filtering():
+
+    mock_model = MockGruptreeModel(tree_type=TreeType.GRUPTREE)
+    assert "BRANPROP" not in mock_model.dataframe["KEYWORD"].unique()
+
+    mock_model = MockGruptreeModel(tree_type=TreeType.BRANPROP)
+    assert "GRUPTREE" not in mock_model.dataframe["KEYWORD"].unique()
+
+    # If tree_type is defaulted then the BRANPROP tree is selected
+    mock_model = MockGruptreeModel()
+    assert "GRUPTREE" not in mock_model.dataframe["KEYWORD"].unique()
diff --git a/tests/unit_tests/plugin_tests/test_grouptree.py b/tests/unit_tests/plugin_tests/test_grouptree.py
index 0c7707ac0..7f965fa19 100644
--- a/tests/unit_tests/plugin_tests/test_grouptree.py
+++ b/tests/unit_tests/plugin_tests/test_grouptree.py
@@ -170,7 +170,7 @@ def test_add_nodetype(
         "CHILD"
     ].unique()
 
-    output = add_nodetype(gruptree_df, provider, wells)
+    output = add_nodetype(gruptree_df, provider, wells, "FIELD")
     pd.testing.assert_frame_equal(
         output[columns_to_check], expected_df[columns_to_check]
     )
diff --git a/webviz_subsurface/_models/gruptree_model.py b/webviz_subsurface/_models/gruptree_model.py
index c3f5f27fb..d1b849f64 100644
--- a/webviz_subsurface/_models/gruptree_model.py
+++ b/webviz_subsurface/_models/gruptree_model.py
@@ -1,12 +1,18 @@
 from pathlib import Path
-from typing import Callable, Dict, List, Tuple
+from typing import Callable, Dict, List, Optional, Tuple
 
 import pandas as pd
+from webviz_config.utils import StrEnum
 from webviz_config.webviz_store import webvizstore
 
 from webviz_subsurface._datainput.fmu_input import scratch_ensemble
 
 
+class TreeType(StrEnum):
+    GRUPTREE = "GRUPTREE"
+    BRANPROP = "BRANPROP"
+
+
 class GruptreeModel:
     """Facilitates loading of gruptree tables. Can be reused in all
     plugins that are using grouptree data and extended with additional
@@ -18,13 +24,14 @@ def __init__(
         ens_name: str,
         ens_path: Path,
         gruptree_file: str,
-        remove_gruptree_if_branprop: bool = True,
+        tree_type: Optional[str] = None,
     ):
         self._ens_name = ens_name
         self._ens_path = ens_path
         self._gruptree_file = gruptree_file
-        self._remove_gruptree_if_branprop = remove_gruptree_if_branprop
+        self._tree_type = TreeType(tree_type) if tree_type is not None else None
         self._dataframe = self.read_ensemble_gruptree()
+
         self._gruptrees_are_equal_over_reals = (
             self._dataframe["REAL"].nunique() == 1
             if not self._dataframe.empty
@@ -46,6 +53,74 @@ def dataframe(self) -> pd.DataFrame:
         """
         return self._dataframe
 
+    def get_filtered_dataframe(
+        self,
+        terminal_node: Optional[str] = None,
+        excl_well_startswith: Optional[List[str]] = None,
+        excl_well_endswith: Optional[List[str]] = None,
+    ) -> pd.DataFrame:
+        """This function returns a sub-set of the rows in the gruptree dataframe
+        filtered according to the input arguments:
+
+        - terminal_node: returns the terminal node and all nodes below it in the
+        tree (for all realizations and dates)
+        - excl_well_startswith: removes WELSPECS rows where CHILD starts with any
+        of the entries in the list.
+        - excl_well_endswith: removes WELSPECS rows where CHILD ends with any
+        of the entries in the list.
+
+        """
+        df = self._dataframe
+
+        if terminal_node is not None:
+
+            if terminal_node not in self._dataframe["CHILD"].unique():
+                raise ValueError(
+                    f"Terminal node '{terminal_node}' not found in 'CHILD' column "
+                    "of the gruptree data."
+                )
+
+            branch_nodes = self._get_branch_nodes(terminal_node)
+            df = self._dataframe[self._dataframe["CHILD"].isin(branch_nodes)]
+
+        def filter_wells(
+            dframe: pd.DataFrame, well_name_criteria: Callable
+        ) -> pd.DataFrame:
+            return dframe[
+                (dframe["KEYWORD"] != "WELSPECS")
+                | (
+                    (dframe["KEYWORD"] == "WELSPECS")
+                    & (~well_name_criteria(dframe["CHILD"]))
+                )
+            ]
+
+        if excl_well_startswith is not None:
+            # Filter out WELSPECS rows where CHILD starts with any element in excl_well_startswith
+            # Conversion to tuple done outside lambda due to mypy
+            excl_well_startswith_tuple = tuple(excl_well_startswith)
+            df = filter_wells(
+                df, lambda x: x.str.startswith(excl_well_startswith_tuple)
+            )
+
+        if excl_well_endswith is not None:
+            # Filter out WELSPECS rows where CHILD ends with any element in excl_well_endswith
+            # Conversion to tuple done outside lambda due to mypy
+            excl_well_endswith_tuple = tuple(excl_well_endswith)
+            df = filter_wells(df, lambda x: x.str.endswith(excl_well_endswith_tuple))
+
+        return df.copy()
+
+    def _get_branch_nodes(self, terminal_node: str) -> List[str]:
+        """The function is using recursion to find all wells below the node
+        in the three.
+        """
+        branch_nodes = [terminal_node]
+
+        children = self._dataframe[self._dataframe["PARENT"] == terminal_node]
+        for _, childrow in children.iterrows():
+            branch_nodes.extend(self._get_branch_nodes(childrow["CHILD"]))
+        return branch_nodes
+
     @property
     def gruptrees_are_equal_over_reals(self) -> bool:
         """Returns true if gruptrees are exactly equal in all realizations."""
@@ -69,19 +144,27 @@ def webviz_store(self) -> Tuple[Callable, List[Dict]]:
         )
 
     @webvizstore
-    def read_ensemble_gruptree(self) -> pd.DataFrame:
+    def read_ensemble_gruptree(
+        self, df_files: Optional[pd.DataFrame] = None
+    ) -> pd.DataFrame:
         """Reads the gruptree files for an ensemble from the scratch disk. These
         files can be exported in the FMU workflow using the ECL2CSV
         forward model with subcommand gruptree.
 
-        If BRANPROP is found in the KEYWORD column, then GRUPTREE rows
-        are filtered out.
+        If tree_type == BRANPROP then GRUPTREE rows are filtered out
+        If tree_type == GRUPTREE then BRANPROP rows are filtered out
 
         If the trees are equal in every realization, only one realization is kept.
-        """
 
-        ens = scratch_ensemble(self._ens_name, self._ens_path, filter_file="OK")
-        df_files = ens.find_files(self._gruptree_file)
+        It is possible to pass a dataframe of file names (only columns required is
+        REAL and FULLPATH). This is mostly intended for testing. If this is defaulted
+        the files are found automatically using the scratch_ensemble.
+        """
+        if df_files is None:
+            ens = scratch_ensemble(
+                self._ens_name, str(self._ens_path), filter_file="OK"
+            )
+            df_files = ens.find_files(self._gruptree_file)
 
         if df_files.empty:
             return pd.DataFrame()
@@ -93,12 +176,29 @@ def read_ensemble_gruptree(self) -> pd.DataFrame:
         gruptrees_are_equal = True
         for i, row in df_files.iterrows():
             df_real = pd.read_csv(row["FULLPATH"])
+            unique_keywords = df_real["KEYWORD"].unique()
 
-            if (
-                self._remove_gruptree_if_branprop
-                and "BRANPROP" in df_real["KEYWORD"].unique()
-            ):
-                df_real = df_real[df_real["KEYWORD"] != "GRUPTREE"]
+            if self._tree_type is None:
+                # if tree_type is None, then we filter out GRUPTREE if BRANPROP
+                # exists, if else we do nothing.
+                if TreeType.BRANPROP.value in unique_keywords:
+                    df_real = df_real[df_real["KEYWORD"] != TreeType.GRUPTREE.value]
+
+            else:
+                if self._tree_type.value not in unique_keywords:
+                    raise ValueError(
+                        f"Keyword {self._tree_type.value} not found in {row['FULLPATH']}"
+                    )
+                if (
+                    self._tree_type == TreeType.GRUPTREE
+                    and TreeType.BRANPROP.value in unique_keywords
+                ):
+                    # Filter out BRANPROP entries
+                    df_real = df_real[df_real["KEYWORD"] != TreeType.BRANPROP.value]
+
+                if self._tree_type == TreeType.BRANPROP:
+                    # Filter out GRUPTREE entries
+                    df_real = df_real[df_real["KEYWORD"] != TreeType.GRUPTREE.value]
 
             if (
                 i > 0
diff --git a/webviz_subsurface/plugins/_group_tree/_plugin.py b/webviz_subsurface/plugins/_group_tree/_plugin.py
index 4436a8fb6..c2560c134 100644
--- a/webviz_subsurface/plugins/_group_tree/_plugin.py
+++ b/webviz_subsurface/plugins/_group_tree/_plugin.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Callable, Dict, List, Tuple
+from typing import Callable, Dict, List, Optional, Tuple
 
 from webviz_config import WebvizPluginABC, WebvizSettings
 from webviz_config.utils import StrEnum
@@ -28,15 +28,19 @@ class GroupTree(WebvizPluginABC):
     **Summary data**
 
     This plugin needs the following summary vectors to be exported:
-    * FOPR, FWPR, FOPR, FWIR and FGIR
-    * GPR for all group nodes in the network
+    * WSTAT for all wells
+    * FWIR and FGIR if there are injector wells in the network
     * GOPR, GWPR and GGPR for all group nodes in the production network \
-    (GOPRNB etc for BRANPROP trees)
-    * GGIR and/or GWIR for all group nodes in the injection network
-    * WSTAT, WTHP, WBHP, WMCTL for all wells
+    except the terminal node (GOPRNB etc for BRANPROP trees)
+    * GGIR and/or GWIR for all group nodes in the injection network \
+    except the terminal node.
     * WOPR, WWPR, WGPR for all producers
     * WWIR and/or WGIR for all injectors
 
+    The following data will be displayed if available:
+    * GPR for all group nodes
+    * WTHP, WBHP, WMCTL for all wells
+
     **GRUPTREE input**
 
     `gruptree_file` is a path to a file stored per realization (e.g. in \
@@ -50,6 +54,27 @@ class GroupTree(WebvizPluginABC):
 
     This is the sampling interval of the summary data. It is `yearly` by default, but can be set
     to `monthly` if needed.
+
+    **terminal_node**
+
+    This parameter allows you to specify the terminal node used. It is `FIELD` by default.
+
+    **tree_type**
+
+    This parameter allows you to specify which tree type is vizualised. It is `GRUPTREE` by
+    default, but can also be set to `BRANPROP`.
+
+    **excl_well_startswith**
+
+    This parameter allows you to remove wells that starts with any of the strings in this list.
+    It is intended to be used to remove f.ex RFT wells that don't have any production or injection.
+    Be aware that if actual producers/injectors are removed, the rates in the tree might not be
+    consistant.
+
+    **excl_well_endswith**
+
+    Same as excl_well_startswith, but removes wells that ends with any of the strings in this list.
+
     """
 
     class Ids(StrEnum):
@@ -62,17 +87,27 @@ def __init__(
         gruptree_file: str = "share/results/tables/gruptree.csv",
         rel_file_pattern: str = "share/results/unsmry/*.arrow",
         time_index: str = "yearly",
+        terminal_node: str = "FIELD",
+        tree_type: str = "GRUPTREE",
+        excl_well_startswith: Optional[List] = None,
+        excl_well_endswith: Optional[List] = None,
     ) -> None:
+        # pylint: disable=too-many-arguments
         super().__init__(stretch=True)
 
         self._ensembles = ensembles
         self._gruptree_file = gruptree_file
 
+        if excl_well_startswith is None:
+            excl_well_startswith = []
+        excl_well_startswith = [str(element) for element in excl_well_startswith]
+        if excl_well_endswith is None:
+            excl_well_endswith = []
+        excl_well_endswith = [str(element) for element in excl_well_endswith]
+
         if ensembles is None:
             raise ValueError('Incorrect argument, must provide "ensembles"')
 
-        sampling = Frequency(time_index)
-
         self._ensemble_paths: Dict[str, Path] = {
             ensemble_name: webviz_settings.shared_settings["scratch_ensembles"][
                 ensemble_name
@@ -92,7 +127,16 @@ def __init__(
                 )
             )
             self._group_tree_data[ens_name] = EnsembleGroupTreeData(
-                provider, GruptreeModel(ens_name, ens_path, gruptree_file)
+                provider=provider,
+                gruptree_model=GruptreeModel(
+                    ens_name=ens_name,
+                    ens_path=ens_path,
+                    gruptree_file=gruptree_file,
+                    tree_type=tree_type,
+                ),
+                terminal_node=terminal_node,
+                excl_well_startswith=excl_well_startswith,
+                excl_well_endswith=excl_well_endswith,
             )
 
         self.add_view(
diff --git a/webviz_subsurface/plugins/_group_tree/_types.py b/webviz_subsurface/plugins/_group_tree/_types.py
index 05ebec1b8..8f7f81b5e 100644
--- a/webviz_subsurface/plugins/_group_tree/_types.py
+++ b/webviz_subsurface/plugins/_group_tree/_types.py
@@ -19,3 +19,19 @@ class NodeType(StrEnum):
     PROD = "prod"
     INJ = "inj"
     OTHER = "other"
+
+
+class DataType(StrEnum):
+    OILRATE = "oilrate"
+    GASRATE = "gasrate"
+    WATERRATE = "waterrate"
+    WATERINJRATE = "waterinjrate"
+    GASINJRATE = "gasinjrate"
+    PRESSURE = "pressure"
+    BHP = "bhp"
+    WMCTL = "wmctl"
+
+
+class EdgeOrNode(StrEnum):
+    EDGE = "edge"
+    NODE = "node"
diff --git a/webviz_subsurface/plugins/_group_tree/_utils/_ensemble_group_tree_data.py b/webviz_subsurface/plugins/_group_tree/_utils/_ensemble_group_tree_data.py
index 08e458551..780447769 100644
--- a/webviz_subsurface/plugins/_group_tree/_utils/_ensemble_group_tree_data.py
+++ b/webviz_subsurface/plugins/_group_tree/_utils/_ensemble_group_tree_data.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Any, Callable, Dict, List, Tuple
+from typing import Any, Callable, Dict, List, Optional, Tuple
 
 import numpy as np
 import pandas as pd
@@ -8,7 +8,7 @@
 from webviz_subsurface._models import GruptreeModel
 from webviz_subsurface._providers import EnsembleSummaryProvider
 
-from .._types import NodeType, StatOptions, TreeModeOptions
+from .._types import DataType, EdgeOrNode, NodeType, StatOptions, TreeModeOptions
 
 
 class EnsembleGroupTreeData:
@@ -17,29 +17,45 @@ class EnsembleGroupTreeData:
     """
 
     def __init__(
-        self, provider: EnsembleSummaryProvider, gruptree_model: GruptreeModel
+        self,
+        provider: EnsembleSummaryProvider,
+        gruptree_model: GruptreeModel,
+        terminal_node: str,
+        excl_well_startswith: Optional[List[str]] = None,
+        excl_well_endswith: Optional[List[str]] = None,
     ):
 
         self._provider = provider
         self._gruptree_model = gruptree_model
-        self._gruptree = self._gruptree_model.dataframe
+        self._terminal_node = terminal_node
+        self._gruptree = self._gruptree_model.get_filtered_dataframe(
+            terminal_node=self._terminal_node,
+            excl_well_startswith=excl_well_startswith,
+            excl_well_endswith=excl_well_endswith,
+        )
+
         self._wells: List[str] = self._gruptree[
             self._gruptree["KEYWORD"] == "WELSPECS"
         ]["CHILD"].unique()
 
-        # Check that all field rate and WSTAT summary vectors exist
-        self._check_that_sumvecs_exists(
-            ["FOPR", "FGPR", "FWPR", "FWIR", "FGIR"]
-            + [f"WSTAT:{well}" for well in self._wells]
-        )
-
-        # Check if the ensemble has waterinj and/or gasinj
-        smry = self._provider.get_vectors_df(["FWIR", "FGIR"], None)
-        self._has_waterinj = smry["FWIR"].sum() > 0
-        self._has_gasinj = smry["FGIR"].sum() > 0
+        # Check that all WSTAT summary vectors exist
+        # They are used to determine which summary vector are needed next.
+        self._check_that_sumvecs_exists([f"WSTAT:{well}" for well in self._wells])
 
         # Add nodetypes IS_PROD, IS_INJ and IS_OTHER to gruptree
-        self._gruptree = add_nodetype(self._gruptree, self._provider, self._wells)
+        self._gruptree = add_nodetype(
+            self._gruptree, self._provider, self._wells, self._terminal_node
+        )
+
+        self._has_waterinj = False
+        self._has_gasinj = False
+        if True in self._gruptree["IS_INJ"].unique():
+            # If there is injection in the tree we need to determine
+            # which kind of injection. For that wee need FWIR and FGIR
+            self._check_that_sumvecs_exists(["FWIR", "FGIR"])
+            smry = self._provider.get_vectors_df(["FWIR", "FGIR"], None)
+            self._has_waterinj = smry["FWIR"].sum() > 0
+            self._has_gasinj = smry["FGIR"].sum() > 0
 
         # Add edge label
         self._gruptree["EDGE_LABEL"] = self._gruptree.apply(get_edge_label, axis=1)
@@ -47,8 +63,10 @@ def __init__(
         # Get summary data with metadata (nodename, datatype, edge_or_node)
         self._sumvecs: pd.DataFrame = self._get_sumvecs_with_metadata()
 
-        # Check that all summary vectors exist
-        self._check_that_sumvecs_exists(list(self._sumvecs["SUMVEC"]))
+        # Check that all edge summary vectors exist
+        self._check_that_sumvecs_exists(
+            list(self._sumvecs[self._sumvecs["EDGE_NODE"] == EdgeOrNode.EDGE]["SUMVEC"])
+        )
 
     @property
     def webviz_store(self) -> Tuple[Callable, List[Dict]]:
@@ -75,7 +93,12 @@ def create_grouptree_dataset(
         """  # noqa
 
         # Filter smry
-        smry = self._provider.get_vectors_df(list(self._sumvecs["SUMVEC"]), None)
+        vectors = [
+            sumvec
+            for sumvec in self._sumvecs["SUMVEC"]
+            if sumvec in self._provider.vector_names()
+        ]
+        smry = self._provider.get_vectors_df(vectors, None)
 
         if tree_mode is TreeModeOptions.STATISTICS:
             if stat_option is StatOptions.MEAN:
@@ -95,7 +118,10 @@ def create_grouptree_dataset(
             smry = smry[smry["REAL"] == real]
 
         gruptree_filtered = self._gruptree
-        if tree_mode == "single_real" and not self.tree_is_equivalent_in_all_real():
+        if (
+            tree_mode == TreeModeOptions.SINGLE_REAL
+            and not self.tree_is_equivalent_in_all_real()
+        ):
             # Trees are not equal. Filter on realization
             gruptree_filtered = gruptree_filtered[gruptree_filtered["REAL"] == real]
 
@@ -106,11 +132,11 @@ def create_grouptree_dataset(
         gruptree_filtered = pd.concat(dfs).drop_duplicates()
 
         return (
-            create_dataset(smry, gruptree_filtered, self._sumvecs),
+            create_dataset(smry, gruptree_filtered, self._sumvecs, self._terminal_node),
             self.get_edge_options(node_types),
             [
-                {"name": option, "label": get_label(option)}
-                for option in ["pressure", "bhp", "wmctl"]
+                {"name": datatype, "label": get_label(datatype)}
+                for datatype in [DataType.PRESSURE, DataType.BHP, DataType.WMCTL]
             ],
         )
 
@@ -135,6 +161,8 @@ def _get_sumvecs_with_metadata(
         * nodename: name in eclipse network
         * datatype: oilrate, gasrate, pressure etc
         * edge_node: whether the datatype is edge (f.ex rates) or node (f.ex pressure)
+
+        Rates are not required for the terminal node since they will not be used.
         """
         records = []
 
@@ -143,15 +171,23 @@ def _get_sumvecs_with_metadata(
             nodename = noderow["CHILD"]
             keyword = noderow["KEYWORD"]
 
-            datatypes = ["pressure"]
-            if noderow["IS_PROD"]:
-                datatypes += ["oilrate", "gasrate", "waterrate"]
-            if noderow["IS_INJ"] and self._has_waterinj:
-                datatypes.append("waterinjrate")
-            if noderow["IS_INJ"] and self._has_gasinj:
-                datatypes.append("gasinjrate")
+            datatypes = [DataType.PRESSURE]
+            if noderow["IS_PROD"] and nodename != self._terminal_node:
+                datatypes += [DataType.OILRATE, DataType.GASRATE, DataType.WATERRATE]
+            if (
+                noderow["IS_INJ"]
+                and self._has_waterinj
+                and nodename != self._terminal_node
+            ):
+                datatypes.append(DataType.WATERINJRATE)
+            if (
+                noderow["IS_INJ"]
+                and self._has_gasinj
+                and nodename != self._terminal_node
+            ):
+                datatypes.append(DataType.GASINJRATE)
             if keyword == "WELSPECS":
-                datatypes += ["bhp", "wmctl"]
+                datatypes += [DataType.BHP, DataType.WMCTL]
 
             for datatype in datatypes:
                 records.append(
@@ -186,21 +222,28 @@ def get_edge_options(self, node_types: List[NodeType]) -> List[Dict[str, str]]:
         """Returns a list with edge node options for the dropdown
         menu in the GroupTree component. The output list has the format:
         [
-            {"name": "oilrate", "label": "Oil Rate"},
-            {"name": "gasrate", "label": "Gas Rate"},
+            {"name": DataType.OILRATE, "label": "Oil Rate"},
+            {"name": DataType.GasRATE, "label": "Gas Rate"},
         ]
         """
         options = []
         if NodeType.PROD in node_types:
-            for rate in ["oilrate", "gasrate", "waterrate"]:
+            for rate in [DataType.OILRATE, DataType.GASRATE, DataType.WATERRATE]:
                 options.append({"name": rate, "label": get_label(rate)})
         if NodeType.INJ in node_types and self._has_waterinj:
-            options.append({"name": "waterinjrate", "label": get_label("waterinjrate")})
+            options.append(
+                {
+                    "name": DataType.WATERINJRATE,
+                    "label": get_label(DataType.WATERINJRATE),
+                }
+            )
         if NodeType.INJ in node_types and self._has_gasinj:
-            options.append({"name": "gasinjrate", "label": get_label("gasinjrate")})
+            options.append(
+                {"name": DataType.GASINJRATE, "label": get_label(DataType.GASINJRATE)}
+            )
         if options:
             return options
-        return [{"name": "oilrate", "label": get_label("oilrate")}]
+        return [{"name": DataType.OILRATE, "label": get_label(DataType.OILRATE)}]
 
 
 def get_edge_label(row: pd.Series) -> str:
@@ -215,25 +258,25 @@ def get_edge_label(row: pd.Series) -> str:
     return f"VFP {vfp_nb}"
 
 
-def get_label(datatype: str) -> str:
+def get_label(datatype: DataType) -> str:
     """Returns a more readable label for the summary datatypes"""
     labels = {
-        "oilrate": "Oil Rate",
-        "gasrate": "Gas Rate",
-        "waterrate": "Water Rate",
-        "waterinjrate": "Water Inj Rate",
-        "gasinjrate": "Gas Inj Rate",
-        "pressure": "Pressure",
-        "bhp": "BHP",
-        "wmctl": "WMCTL",
+        DataType.OILRATE: "Oil Rate",
+        DataType.GASRATE: "Gas Rate",
+        DataType.WATERRATE: "Water Rate",
+        DataType.WATERINJRATE: "Water Inj Rate",
+        DataType.GASINJRATE: "Gas Inj Rate",
+        DataType.PRESSURE: "Pressure",
+        DataType.BHP: "BHP",
+        DataType.WMCTL: "WMCTL",
     }
     if datatype in labels:
         return labels[datatype]
-    raise ValueError(f"Label for dataype {datatype} not implemented.")
+    raise ValueError(f"Label for datatype {datatype.value} not implemented.")
 
 
 def get_sumvec(
-    datatype: str,
+    datatype: DataType,
     nodename: str,
     keyword: str,
 ) -> str:
@@ -244,36 +287,40 @@ def get_sumvec(
     """
     datatype_map = {
         "FIELD": {
-            "oilrate": "FOPR",
-            "gasrate": "FGPR",
-            "waterrate": "FWPR",
-            "waterinjrate": "FWIR",
-            "gasinjrate": "FGIR",
-            "pressure": "GPR",
+            DataType.OILRATE: "FOPR",
+            DataType.GASRATE: "FGPR",
+            DataType.WATERRATE: "FWPR",
+            DataType.WATERINJRATE: "FWIR",
+            DataType.GASINJRATE: "FGIR",
+            DataType.PRESSURE: "GPR",
         },
         "GRUPTREE": {
-            "oilrate": "GOPR",
-            "gasrate": "GGPR",
-            "waterrate": "GWPR",
-            "waterinjrate": "GWIR",
-            "gasinjrate": "GGIR",
-            "pressure": "GPR",
+            DataType.OILRATE: "GOPR",
+            DataType.GASRATE: "GGPR",
+            DataType.WATERRATE: "GWPR",
+            DataType.WATERINJRATE: "GWIR",
+            DataType.GASINJRATE: "GGIR",
+            DataType.PRESSURE: "GPR",
         },
+        # BRANPROP can not be used for injection, but the nodes
+        # might also be GNETINJE and could therefore have injection.
         "BRANPROP": {
-            "oilrate": "GOPRNB",
-            "gasrate": "GGPRNB",
-            "waterrate": "GWPRNB",
-            "pressure": "GPR",
+            DataType.OILRATE: "GOPRNB",
+            DataType.GASRATE: "GGPRNB",
+            DataType.WATERRATE: "GWPRNB",
+            DataType.PRESSURE: "GPR",
+            DataType.WATERINJRATE: "GWIR",
+            DataType.GASINJRATE: "GGIR",
         },
         "WELSPECS": {
-            "oilrate": "WOPR",
-            "gasrate": "WGPR",
-            "waterrate": "WWPR",
-            "waterinjrate": "WWIR",
-            "gasinjrate": "WGIR",
-            "pressure": "WTHP",
-            "bhp": "WBHP",
-            "wmctl": "WMCTL",
+            DataType.OILRATE: "WOPR",
+            DataType.GASRATE: "WGPR",
+            DataType.WATERRATE: "WWPR",
+            DataType.WATERINJRATE: "WWIR",
+            DataType.GASINJRATE: "WGIR",
+            DataType.PRESSURE: "WTHP",
+            DataType.BHP: "WBHP",
+            DataType.WMCTL: "WMCTL",
         },
     }
     if nodename == "FIELD":
@@ -281,21 +328,37 @@ def get_sumvec(
         if datatype == "pressure":
             return f"{datatype_ecl}:{nodename}"
         return datatype_ecl
-    datatype_ecl = datatype_map[keyword][datatype]
+    try:
+        datatype_ecl = datatype_map[keyword][datatype]
+    except KeyError as exc:
+        error = (
+            f"Summary vector not found for eclipse keyword: {keyword}, "
+            f"data type: {datatype.value} and node name: {nodename}. "
+        )
+        raise KeyError(error) from exc
     return f"{datatype_ecl}:{nodename}"
 
 
-def get_edge_node(datatype: str) -> str:
+def get_edge_node(datatype: DataType) -> EdgeOrNode:
     """Returns if a given datatype is edge (typically rates) or node (f.ex pressures)"""
-    if datatype in ["oilrate", "gasrate", "waterrate", "waterinjrate", "gasinjrate"]:
-        return "edge"
-    if datatype in ["pressure", "bhp", "wmctl"]:
-        return "node"
-    raise ValueError(f"Data type {datatype} not implemented.")
+    if datatype in [
+        DataType.OILRATE,
+        DataType.GASRATE,
+        DataType.WATERRATE,
+        DataType.WATERINJRATE,
+        DataType.GASINJRATE,
+    ]:
+        return EdgeOrNode.EDGE
+    if datatype in [DataType.PRESSURE, DataType.BHP, DataType.WMCTL]:
+        return EdgeOrNode.NODE
+    raise ValueError(f"Data type {datatype.value} not implemented.")
 
 
 def create_dataset(
-    smry: pd.DataFrame, gruptree: pd.DataFrame, sumvecs: pd.DataFrame
+    smry: pd.DataFrame,
+    gruptree: pd.DataFrame,
+    sumvecs: pd.DataFrame,
+    terminal_node: str,
 ) -> List[dict]:
     """The function puts together the GroupTree component input dataset.
 
@@ -319,7 +382,7 @@ def create_dataset(
                 {
                     "dates": [date.strftime("%Y-%m-%d") for date in dates],
                     "tree": extract_tree(
-                        gruptree_date, "FIELD", smry_in_datespan, dates, sumvecs
+                        gruptree_date, terminal_node, smry_in_datespan, dates, sumvecs
                     ),
                 }
             )
@@ -338,7 +401,9 @@ def extract_tree(
     sumvecs: pd.DataFrame,
 ) -> dict:
     """Extract the tree part of the GroupTree component dataset. This functions
-    works recursively and is initially called with the top node of the tree: FIELD."""
+    works recursively and is initially called with the terminal node of the tree
+    (usually FIELD)
+    """
     # pylint: disable=too-many-locals
     node_sumvecs = sumvecs[sumvecs["NODENAME"] == nodename]
     nodedict = get_nodedict(gruptree, nodename)
@@ -349,8 +414,12 @@ def extract_tree(
         "edge_label": nodedict["EDGE_LABEL"],
     }
 
-    edges = node_sumvecs[node_sumvecs["EDGE_NODE"] == "edge"].to_dict("records")
-    nodes = node_sumvecs[node_sumvecs["EDGE_NODE"] == "node"].to_dict("records")
+    edges = node_sumvecs[node_sumvecs["EDGE_NODE"] == EdgeOrNode.EDGE].to_dict(
+        "records"
+    )
+    nodes = node_sumvecs[node_sumvecs["EDGE_NODE"] == EdgeOrNode.NODE].to_dict(
+        "records"
+    )
 
     edge_data: Dict[str, List[float]] = {item["DATATYPE"]: [] for item in edges}
     node_data: Dict[str, List[float]] = {item["DATATYPE"]: [] for item in nodes}
@@ -362,9 +431,12 @@ def extract_tree(
                 round(smry_at_date[item["SUMVEC"]].values[0], 2)
             )
         for item in nodes:
-            node_data[item["DATATYPE"]].append(
-                round(smry_at_date[item["SUMVEC"]].values[0], 2)
-            )
+            try:
+                node_data[item["DATATYPE"]].append(
+                    round(smry_at_date[item["SUMVEC"]].values[0], 2)
+                )
+            except KeyError:
+                node_data[item["DATATYPE"]].append(np.nan)
 
     result["edge_data"] = edge_data
     result["node_data"] = node_data
@@ -391,7 +463,10 @@ def get_nodedict(gruptree: pd.DataFrame, nodename: str) -> Dict[str, Any]:
 
 
 def add_nodetype(
-    gruptree: pd.DataFrame, provider: EnsembleSummaryProvider, all_wells: List[str]
+    gruptree: pd.DataFrame,
+    provider: EnsembleSummaryProvider,
+    all_wells: List[str],
+    terminal_node: str,
 ) -> pd.DataFrame:
     """Adds nodetype IS_PROD, IS_INJ and IS_OTHER."""
 
@@ -425,10 +500,11 @@ def is_leafnode(node: pd.Series) -> bool:
         is_inj_map[node["CHILD"]] = any(leafs_are_inj)
         is_other_map[node["CHILD"]] = any(leafs_are_other)
 
-    # FIELD node must not be filtered out, so it is set True for all categories
-    is_prod_map["FIELD"] = True
-    is_inj_map["FIELD"] = True
-    is_other_map["FIELD"] = True
+    # The terminal node must not be filtered out,
+    # so it is set True for all categories
+    is_prod_map[terminal_node] = True
+    is_inj_map[terminal_node] = True
+    is_other_map[terminal_node] = True
 
     # Tag all nodes as IS_PROD, IS_INJ and IS_OTHER
     gruptree["IS_PROD"] = gruptree["CHILD"].map(is_prod_map)
@@ -488,12 +564,12 @@ def create_leafnodetype_maps(
             # The leaf node is a group
             prod_sumvecs = [
                 get_sumvec(datatype, nodename, nodekeyword)
-                for datatype in ["oilrate", "gasrate", "waterrate"]
+                for datatype in [DataType.OILRATE, DataType.GASRATE, DataType.WATERRATE]
             ]
             inj_sumvecs = (
                 [
                     get_sumvec(datatype, nodename, nodekeyword)
-                    for datatype in ["waterinjrate", "gasinjrate"]
+                    for datatype in [DataType.WATERINJRATE, DataType.GASINJRATE]
                 ]
                 if nodekeyword != "BRANPROP"
                 else []