#359 Work towards read-only Results interface

nismod · Apr 23, 2019 · d068681 · d068681
1 parent 53477fb
commit d068681
Show file tree

Hide file tree

Showing 3 changed files with 212 additions and 1 deletion.
diff --git a/src/smif/data_layer/__init__.py b/src/smif/data_layer/__init__.py
@@ -6,8 +6,9 @@
 #         from smif.data_layer import DataHandle`
 from smif.data_layer.data_array import DataArray
 from smif.data_layer.data_handle import DataHandle
+from smif.data_layer.results import Results
 from smif.data_layer.store import Store
 
 # Define what should be imported as * ::
 #         from smif.data_layer import *
-__all__ = ['DataArray', 'DataHandle', 'Store']
+__all__ = ['DataArray', 'DataHandle', 'Results', 'Store']
diff --git a/src/smif/data_layer/results.py b/src/smif/data_layer/results.py
@@ -0,0 +1,116 @@
+"""Results provides a common interface to access results from model runs.
+
+Raises
+------
+SmifDataNotFoundError
+    If data cannot be found in the store when try to read from the store
+SmifDataMismatchError
+    Data presented to read, write and update methods is in the
+    incorrect format or of wrong dimensions to that expected
+SmifDataReadError
+    When unable to read data e.g. unable to handle file type or connect
+    to database
+"""
+
+import os
+
+from smif.data_layer.file import (CSVDataStore, FileMetadataStore,
+                                  ParquetDataStore, YamlConfigStore)
+from smif.data_layer.store import Store
+
+
+class Results:
+    """Common interface to access results from model runs.
+
+    Parameters
+    ----------
+    interface: str the requested interface (local_csv or local_parquet currently supported)
+    model_base_dir: str the base directory of the model
+    """
+    def __init__(self, interface='local_csv', model_base_dir='.'):
+
+        # Check that the provided interface is supported
+        file_store = self._get_file_store(interface)
+        if file_store is None:
+            raise ValueError(
+                'Unsupported interface "{}". Supply local_csv or local_parquet'.format(
+                    interface))
+
+        # Check that the directory is valid
+        if not os.path.isdir(model_base_dir):
+            raise ValueError('Expected {} to be a valid directory'.format(model_base_dir))
+
+        self._store = Store(
+            config_store=YamlConfigStore(model_base_dir),
+            metadata_store=FileMetadataStore(model_base_dir),
+            data_store=file_store(model_base_dir),
+            model_base_folder=model_base_dir
+        )
+
+    @staticmethod
+    def _get_file_store(interface):
+        """ Return the appropriate derived FileDataStore class, or None if the requested
+        interface is invalid.
+
+        Parameters
+        ----------
+        interface: str the requested interface
+
+        Returns
+        -------
+        The appropriate derived FileDataStore class
+        """
+        return {
+            'local_csv': CSVDataStore,
+            'local_parquet': ParquetDataStore,
+        }.get(interface, None)
+
+    def list_model_runs(self):
+        """ Return a list of model run names.
+
+        Returns
+        -------
+        List of model run names
+        """
+        return sorted([x['name'] for x in self._store.read_model_runs()])
+
+    def available_results(self, model_run_name):
+        """ Return the results available for a given model run.
+
+        Parameters
+        ----------
+        model_run_name: str the requested model run
+
+        Returns
+        -------
+        A nested dictionary data structure of the results available for the given model run
+        """
+
+        available = self._store.available_results(model_run_name)
+
+        results = {
+            'model_run': model_run_name,
+            'sos_model': self._store.read_model_run(model_run_name)['sos_model'],
+            'sector_models': dict(),
+        }
+
+        model_names = {sec for _t, _d, sec, _out in available}
+        for model_name in model_names:
+            results['sector_models'][model_name] = {
+                'outputs': dict(),
+            }
+
+            outputs = {out for _t, _d, sec, out in available if sec == model_name}
+
+            for output in outputs:
+                results['sector_models'][model_name]['outputs'][output] = dict()
+
+                decs = {d for _t, d, sec, out in available if
+                        sec == model_name and out == output}
+
+                for dec in decs:
+                    ts = sorted({t for t, d, sec, out in available if
+                                 d == dec and sec == model_name and out == output})
+                    results['sector_models'][model_name]['outputs'][output][dec] = ts
+
+        return results
diff --git a/tests/data_layer/test_results.py b/tests/data_layer/test_results.py
@@ -0,0 +1,94 @@
+"""Test the Store interface
+
+Many methods simply proxy to config/metadata/data store implementations, but there is some
+cross-coordination and there are some convenience methods implemented at this layer.
+"""
+
+import os
+import subprocess
+
+from pytest import fixture, raises
+from smif.data_layer import Results
+
+
+@fixture(scope="session")
+def tmp_sample_project_no_results(tmpdir_factory):
+    test_folder = tmpdir_factory.mktemp("smif")
+    subprocess.run(
+        ['smif', 'setup', '-d', str(test_folder), '-v'],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE
+    )
+    return str(test_folder)
+
+
+@fixture(scope="session")
+def tmp_sample_project_with_results(tmpdir_factory):
+    test_folder = tmpdir_factory.mktemp("smif")
+    subprocess.run(
+        ['smif', 'setup', '-d', str(test_folder), '-v'],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE
+    )
+    subprocess.run(
+        ['smif', 'run', '-d', str(test_folder), 'energy_central'],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE
+    )
+    return str(test_folder)
+
+
+class TestNoResults:
+
+    def test_exceptions(self, tmp_sample_project_no_results):
+        # Check that invalid interface is dealt with properly
+        with raises(ValueError) as e:
+            Results(interface='unexpected')
+        assert ('Unsupported interface' in str(e.value))
+
+        # Check that invalid directories are dealt with properly
+        with raises(ValueError) as e:
+            fake_path = os.path.join(tmp_sample_project_no_results, 'not', 'valid')
+            Results(model_base_dir=fake_path)
+            assert ('to be a valid directory' in str(e.value))
+
+        # Check that valid options DO work
+        Results(interface='local_csv', model_base_dir=tmp_sample_project_no_results)
+        Results(interface='local_parquet', model_base_dir=tmp_sample_project_no_results)
+
+    def test_list_model_runs(self, tmp_sample_project_no_results):
+        res = Results(interface='local_csv', model_base_dir=tmp_sample_project_no_results)
+        model_runs = res.list_model_runs()
+
+        assert ('energy_central' in model_runs)
+        assert ('energy_water_cp_cr' in model_runs)
+        assert (len(model_runs) == 2)
+
+    def test_available_results(self, tmp_sample_project_no_results):
+        res = Results(interface='local_csv', model_base_dir=tmp_sample_project_no_results)
+        available = res.available_results('energy_central')
+
+        assert (available['model_run'] == 'energy_central')
+        assert (available['sos_model'] == 'energy')
+        assert (available['sector_models'] == dict())
+
+
+class TestSomeResults:
+
+    def test_available_results(self, tmp_sample_project_with_results):
+        res = Results(interface='local_csv', model_base_dir=tmp_sample_project_with_results)
+        available = res.available_results('energy_central')
+
+        assert (available['model_run'] == 'energy_central')
+        assert (available['sos_model'] == 'energy')
+
+        sec_models = available['sector_models']
+        assert (sorted(sec_models.keys()) == ['energy_demand'])
+
+        outputs = sec_models['energy_demand']['outputs']
+        assert (sorted(outputs.keys()) == ['cost', 'water_demand'])
+
+        output_answer = {1: [2010], 2: [2010], 3: [2015], 4: [2020]}
+
+        assert outputs['cost'] == output_answer
+        assert outputs['water_demand'] == output_answer