diff --git a/src/smif/data_layer/__init__.py b/src/smif/data_layer/__init__.py index ac8748d17..42e949135 100644 --- a/src/smif/data_layer/__init__.py +++ b/src/smif/data_layer/__init__.py @@ -6,8 +6,9 @@ # from smif.data_layer import DataHandle` from smif.data_layer.data_array import DataArray from smif.data_layer.data_handle import DataHandle +from smif.data_layer.results import Results from smif.data_layer.store import Store # Define what should be imported as * :: # from smif.data_layer import * -__all__ = ['DataArray', 'DataHandle', 'Store'] +__all__ = ['DataArray', 'DataHandle', 'Results', 'Store'] diff --git a/src/smif/data_layer/file/file_data_store.py b/src/smif/data_layer/file/file_data_store.py index 6fda502d8..ba0069127 100644 --- a/src/smif/data_layer/file/file_data_store.py +++ b/src/smif/data_layer/file/file_data_store.py @@ -42,7 +42,7 @@ def __init__(self, base_folder): dirname = os.path.join(self.data_folder, folder) # ensure each directory exists if not os.path.exists(dirname): - msg = "Expected data folder at '{}' but it does does not exist" + msg = "Expected data folder at '{}' but it does not exist" abs_path = os.path.abspath(dirname) raise SmifDataNotFoundError(msg.format(abs_path)) self.data_folders[folder] = dirname diff --git a/src/smif/data_layer/results.py b/src/smif/data_layer/results.py new file mode 100644 index 000000000..d363060a6 --- /dev/null +++ b/src/smif/data_layer/results.py @@ -0,0 +1,223 @@ +"""Results provides a common interface to access results from model runs. +""" + +from typing import Union + +import pandas as pd +from smif.data_layer.store import Store + + +class Results: + """Common interface to access results from model runs. + + Parameters + ---------- + store: Store or dict + pre-created Store object or dictionary of the form {'interface': , + 'dir': } where is either 'local_csv' or 'local_parquet', and is + the model base directory + """ + def __init__(self, store: Union[Store, dict]): + + if type(store) is dict: + self._store = Store.from_dict(store) + else: + self._store = store # type: Store + + # keep tabs on the units of any read outputs + self._output_units = dict() # type: dict + + def list_model_runs(self): + """Return a list of model run names. + + Returns + ------- + List of model run names + """ + return sorted([x['name'] for x in self._store.read_model_runs()]) + + def available_results(self, model_run_name): + """Return the results available for a given model run. + + Parameters + ---------- + model_run_name: str the requested model run + + Returns + ------- + A nested dictionary data structure of the results available for the given model run + """ + + available = self._store.available_results(model_run_name) + + results = { + 'model_run': model_run_name, + 'sos_model': self._store.read_model_run(model_run_name)['sos_model'], + 'sector_models': dict(), + } + + model_names = {sec for _t, _d, sec, _out in available} + for model_name in model_names: + results['sector_models'][model_name] = { + 'outputs': dict(), + } + + outputs = {out for _t, _d, sec, out in available if sec == model_name} + + for output in outputs: + results['sector_models'][model_name]['outputs'][output] = dict() + + decs = {d for _t, d, sec, out in available if + sec == model_name and out == output} + + for dec in decs: + ts = sorted({t for t, d, sec, out in available if + d == dec and sec == model_name and out == output}) + results['sector_models'][model_name]['outputs'][output][dec] = ts + + return results + + def read(self, + model_run_names: list, + model_names: list, + output_names: list, + timesteps: list = None, + decisions: list = None, + time_decision_tuples: list = None, + ): + """Return results from the store as a formatted pandas data frame. There are a number + of ways of requesting specific timesteps/decisions. You can specify either: + + a list of (timestep, decision) tuples + in which case data for all of those tuples matching the available results will + be returned + or: + a list of timesteps + in which case data for all of those timesteps (and any decision iterations) + matching the available results will be returned + or: + a list of decision iterations + in which case data for all of those decision iterations (and any timesteps) + matching the available results will be returned + or: + a list of timesteps and a list of decision iterations + in which case data for the Cartesian product of those timesteps and those + decision iterations matching the available results will be returned + or: + nothing + in which case all available results will be returned + + Parameters + ---------- + model_run_names: list + the requested model run names + model_names: list + the requested sector model names (exactly one required) + output_names: list + the requested output names (output specs must all match) + timesteps: list + the requested timesteps + decisions: list + the requested decision iterations + time_decision_tuples: list + a list of requested (timestep, decision) tuples + + Raises + ------ + SmifDataNotFoundError + If data cannot be found in the store when try to read from the store + SmifDataMismatchError + Data presented to read, write and update methods is in the + incorrect format or of wrong dimensions to that expected + SmifDataReadError + When unable to read data e.g. unable to handle file type or connect + to database + + Returns + ------- + pandas.DataFrame + """ + + self.validate_names(model_run_names, model_names, output_names) + + results_dict = self._store.get_results( + model_run_names, + model_names[0], + output_names, + timesteps, + decisions, + time_decision_tuples + ) + + # Keep tabs on the units for each output + for model_run_name in model_run_names: + for output_name in output_names: + res = results_dict[model_run_name][output_name] + self._output_units[res.name] = res.unit + + # For each output, concatenate all requested model runs into a single data frame + formatted_frames = [] + for output_name in output_names: + # Get each DataArray as a pandas data frame and concatenate, resetting the index to + # give back a flat data array + list_of_df = [results_dict[x][output_name].as_df() for x in model_run_names] + names_of_df = [x for x in results_dict.keys()] + + formatted_frames.append( + pd.concat(list_of_df, keys=names_of_df, names=['model_run']).reset_index()) + + # Append the other output columns to the first data frame + formatted_frame = formatted_frames.pop(0) + output_names.pop(0) + + for other_frame, output_name in zip(formatted_frames, output_names): + assert (formatted_frame['model_run'] == other_frame['model_run']).all() + assert (formatted_frame['timestep_decision'] == other_frame[ + 'timestep_decision']).all() + formatted_frame[output_name] = other_frame[output_name] + + # Unpack the timestep_decision tuples into individual columns and drop the combined + formatted_frame[['timestep', 'decision']] = pd.DataFrame( + formatted_frame['timestep_decision'].tolist(), index=formatted_frame.index) + + formatted_frame = formatted_frame.drop(columns=['timestep_decision']) + + # Now reorder the columns. Want model_run then timestep then decision + cols = formatted_frame.columns.tolist() + + assert (cols[0] == 'model_run') + cols.insert(1, cols.pop(cols.index('timestep'))) + cols.insert(2, cols.pop(cols.index('decision'))) + assert (cols[0:3] == ['model_run', 'timestep', 'decision']) + + return formatted_frame[cols] + + def get_units(self, output_name: str): + """ Return the units of a given output. + + Parameters + ---------- + output_name: str + + Returns + ------- + str + """ + return self._output_units[output_name] + + def validate_names(self, model_run_names, sec_model_names, output_names): + + if len(sec_model_names) != 1: + raise NotImplementedError( + 'Results.read() currently requires exactly one sector model' + ) + + if len(model_run_names) < 1: + raise ValueError( + 'Results.read() requires at least one sector model name' + ) + + if len(output_names) < 1: + raise ValueError( + 'Results.read() requires at least one output name' + ) diff --git a/src/smif/data_layer/store.py b/src/smif/data_layer/store.py index 9aa125e99..daee7c629 100644 --- a/src/smif/data_layer/store.py +++ b/src/smif/data_layer/store.py @@ -15,8 +15,10 @@ to database """ import itertools +import logging +import os +from collections import OrderedDict from copy import deepcopy -from logging import getLogger from operator import itemgetter from typing import Dict, List, Optional @@ -24,7 +26,8 @@ from smif.data_layer import DataArray from smif.data_layer.abstract_data_store import DataStore from smif.data_layer.abstract_metadata_store import MetadataStore -from smif.data_layer.file import CSVDataStore, ParquetDataStore +from smif.data_layer.file import (CSVDataStore, FileMetadataStore, + ParquetDataStore, YamlConfigStore) from smif.data_layer.validate import (validate_sos_model_config, validate_sos_model_format) from smif.exception import SmifDataNotFoundError @@ -32,7 +35,8 @@ class Store(): - """Common interface to data store, composed of config, metadata and data store implementations. + """Common interface to data store, composed of config, metadata and data store + implementations. Parameters ---------- @@ -42,13 +46,50 @@ class Store(): """ def __init__(self, config_store, metadata_store: MetadataStore, data_store: DataStore, model_base_folder="."): - self.logger = getLogger(__name__) + self.logger = logging.getLogger(__name__) self.config_store = config_store self.metadata_store = metadata_store self.data_store = data_store # base folder for any relative paths to models self.model_base_folder = str(model_base_folder) + @classmethod + def from_dict(cls, config): + """Create Store from configuration dict + """ + + try: + interface = config['interface'] + except KeyError: + logging.warning('No interface provided for Results(). Assuming local_csv') + interface = 'local_csv' + + try: + directory = config['dir'] + except KeyError: + logging.warning("No directory provided for Results(). Assuming '.'") + directory = '.' + + # Check that the directory is valid + if not os.path.isdir(directory): + raise ValueError('Expected {} to be a valid directory'.format(directory)) + + if interface == 'local_csv': + data_store = CSVDataStore(directory) + elif interface == 'local_parquet': + data_store = ParquetDataStore(directory) + else: + raise ValueError( + 'Unsupported interface "{}". Supply local_csv or local_parquet'.format( + interface)) + + return cls( + config_store=YamlConfigStore(directory), + metadata_store=FileMetadataStore(directory), + data_store=data_store, + model_base_folder=directory + ) + # # CONFIG # @@ -804,7 +845,7 @@ def read_results(self, Parameters ---------- - model_run_id : str + model_run_name : str model_name : str output_spec : smif.metadata.Spec timestep : int, default=None @@ -896,8 +937,8 @@ def canonical_available_results(self, model_run_name): canonical_list = [] - for t, d, sec_model_name, output_name in available_results: - canonical_list.append((t, 0, sec_model_name, output_name)) + for t, d, model_name, output_name in available_results: + canonical_list.append((t, 0, model_name, output_name)) # Return as a set to remove duplicates return set(canonical_list) @@ -923,7 +964,7 @@ def canonical_expected_results(self, model_run_name): """ # Model results are returned as a tuple - # (timestep, decision_it, sec_model_name, output_name) + # (timestep, decision_it, model_name, output_name) # so we first build the full list of expected results tuples. expected_results = [] @@ -937,13 +978,13 @@ def canonical_expected_results(self, model_run_name): sos_config = self.read_sos_model(sos_model_name) # For each sector model, get the outputs and create the tuples - for sec_model_name in sos_config['sector_models']: + for model_name in sos_config['sector_models']: - sec_model_config = self.read_model(sec_model_name) - outputs = sec_model_config['outputs'] + model_config = self.read_model(model_name) + outputs = model_config['outputs'] for output, t in itertools.product(outputs, timesteps): - expected_results.append((t, 0, sec_model_name, output['name'])) + expected_results.append((t, 0, model_name, output['name'])) # Return as a set to remove duplicates return set(expected_results) @@ -967,6 +1008,218 @@ def canonical_missing_results(self, model_run_name): return self.canonical_expected_results( model_run_name) - self.canonical_available_results(model_run_name) + def _get_result_darray_internal(self, model_run_name, model_name, output_name, + time_decision_tuples): + """Internal implementation for `get_result_darray`, after the unique list of + (timestep, decision) tuples has been generated and validated. + + This method gets the spec for the output defined by the model_run_name, model_name + and output_name and expands the spec to include an additional dimension for the list of + tuples. + + Then, for each tuple, the data array from the corresponding read_results call is + stacked, and together with the new spec this information is returned as a new + DataArray. + + Parameters + ---------- + model_run_name : str + model_name : str + output_name : str + time_decision_tuples : list of unique (timestep, decision) tuples + + Returns + ------- + DataArray with expanded spec and data for each (timestep, decision) tuple + """ + + # Get the output spec given the name of the sector model and output + output_spec = None + model = self.read_model(model_name) + + for output in model['outputs']: + + # Ignore if the output name doesn't match + if output_name != output['name']: + continue + + output_spec = Spec.from_dict(output) + + assert output_spec, "Output name was not found in model outputs" + + # Read the results for each (timestep, decision) tuple and stack them + list_of_numpy_arrays = [] + + for t, d in time_decision_tuples: + d_array = self.read_results(model_run_name, model_name, output_spec, t, d) + list_of_numpy_arrays.append(d_array.data) + + stacked_data = np.vstack(list_of_numpy_arrays) + data = np.transpose(stacked_data) + + # Add new dimensions to the data spec + output_dict = output_spec.as_dict() + output_dict['dims'].append('timestep_decision') + output_dict['coords']['timestep_decision'] = time_decision_tuples + + output_spec = Spec.from_dict(output_dict) + + # Create a new DataArray from the modified spec and stacked data + return DataArray(output_spec, np.reshape(data, output_spec.shape)) + + def get_result_darray(self, model_run_name, model_name, output_name, timesteps=None, + decision_iterations=None, time_decision_tuples=None): + """Return data for multiple timesteps and decision iterations for a given output from + a given sector model in a specific model run. + + You can specify either: + a list of (timestep, decision) tuples + in which case data for all of those tuples matching the available results will + be returned + or: + a list of timesteps + in which case data for all of those timesteps (and any decision iterations) + matching the available results will be returned + or: + a list of decision iterations + in which case data for all of those decision iterations (and any timesteps) + matching the available results will be returned + or: + a list of timesteps and a list of decision iterations + in which case data for the Cartesian product of those timesteps and those + decision iterations matching the available results will be returned + or: + nothing + in which case all available results will be returned + + Then, for each tuple, the data array from the corresponding read_results call is + stacked, and together with the new spec this information is returned as a new + DataArray. + + Parameters + ---------- + model_run_name : str + model_name : str + output_name : str + timesteps : optional list of timesteps + decision_iterations : optional list of decision iterations + time_decision_tuples : optional list of unique (timestep, decision) tuples + + Returns + ------- + DataArray with expanded spec and the data requested + """ + available = self.available_results(model_run_name) + + # Build up the necessary list of tuples + if not timesteps and not decision_iterations and not time_decision_tuples: + list_of_tuples = [ + (t, d) for t, d, m, out in available + if m == model_name and out == output_name + ] + + elif timesteps and not decision_iterations and not time_decision_tuples: + list_of_tuples = [ + (t, d) for t, d, m, out in available + if m == model_name and out == output_name and t in timesteps + ] + + elif decision_iterations and not timesteps and not time_decision_tuples: + list_of_tuples = [ + (t, d) for t, d, m, out in available + if m == model_name and out == output_name and d in decision_iterations + ] + + elif time_decision_tuples and not timesteps and not decision_iterations: + list_of_tuples = [ + (t, d) for t, d, m, out in available + if m == model_name and out == output_name and (t, d) in time_decision_tuples + ] + + elif timesteps and decision_iterations and not time_decision_tuples: + t_d = list(itertools.product(timesteps, decision_iterations)) + list_of_tuples = [ + (t, d) for t, d, m, out in available + if m == model_name and out == output_name and (t, d) in t_d + ] + + else: + msg = "Expected either timesteps, or decisions, or (timestep, decision) " + \ + "tuples, or timesteps and decisions, or none of the above." + raise ValueError(msg) + + if not list_of_tuples: + raise SmifDataNotFoundError("None of the requested data is available.") + + return self._get_result_darray_internal( + model_run_name, model_name, output_name, sorted(list_of_tuples) + ) + + def get_results(self, + model_run_names: list, + model_name: str, + output_names: list, + timesteps: list = None, + decisions: list = None, + time_decision_tuples: list = None, + ): + """Return data for multiple timesteps and decision iterations for a given output from + a given sector model for multiple model runs. + + Parameters + ---------- + model_run_names: list[str] + the requested model run names + model_name: str + the requested sector model name + output_names: list[str] + the requested output names (output specs must all match) + timesteps: list[int] + the requested timesteps + decisions: list[int] + the requested decision iterations + time_decision_tuples: list[tuple] + a list of requested (timestep, decision) tuples + + Returns + ------- + dict + Nested dictionary of DataArray objects, keyed on model run name and output name. + Returned DataArrays include one extra (timestep, decision_iteration) dimension. + """ + + # List the available output names and verify requested outputs match + outputs = self.read_model(model_name)['outputs'] + available_outputs = [output['name'] for output in outputs] + + for output_name in output_names: + assert output_name in available_outputs, \ + '{} is not an output of sector model {}.'.format(output_name, model_name) + + # The spec for each requested output must be the same. We check they have the same + # coordinates + coords = [Spec.from_dict(output).coords for output in outputs if + output['name'] in output_names] + + for coord in coords: + if coord != coords[0]: + raise ValueError('Different outputs must have the same coordinates') + + # Now actually obtain the requested results + results_dict = OrderedDict() # type: OrderedDict + for model_run_name in model_run_names: + results_dict[model_run_name] = OrderedDict() + for output_name in output_names: + results_dict[model_run_name][output_name] = self.get_result_darray( + model_run_name, + model_name, + output_name, + timesteps, + decisions, + time_decision_tuples + ) + return results_dict + # endregion # region data store utilities diff --git a/tests/data_layer/test_results.py b/tests/data_layer/test_results.py new file mode 100644 index 000000000..a93e1867b --- /dev/null +++ b/tests/data_layer/test_results.py @@ -0,0 +1,296 @@ +"""Test the Results interface +""" + +import os +from collections import OrderedDict + +import numpy as np +import pandas as pd +from pytest import fixture, raises +from smif.data_layer import DataArray, Results +from smif.exception import SmifDataNotFoundError +from smif.metadata import Spec + + +@fixture +def results_no_results(empty_store): + """Results fixture with a model run and fictional results + """ + empty_store.write_dimension({ + 'name': 'sample_dim', + 'elements': [{'name': 'a'}, {'name': 'b'}] + }) + sample_output = { + 'name': 'sample_output', + 'dtype': 'float', + 'dims': ['sample_dim'], + 'coords': {'sample_dim': [{'name': 'a'}, {'name': 'b'}]}, + 'unit': 'm' + } + empty_store.write_model({ + 'name': 'a_model', + 'description': "Sample model", + 'classname': 'DoesNotExist', + 'path': '/dev/null', + 'inputs': [], + 'outputs': [sample_output], + 'parameters': [], + 'interventions': [], + 'initial_conditions': [] + }) + empty_store.write_model({ + 'name': 'b_model', + 'description': "Second sample model", + 'classname': 'DoesNotExist', + 'path': '/dev/null', + 'inputs': [], + 'outputs': [sample_output], + 'parameters': [], + 'interventions': [], + 'initial_conditions': [] + }) + empty_store.write_sos_model({ + 'name': 'a_sos_model', + 'description': 'Sample SoS', + 'sector_models': ['a_model', 'b_model'], + 'scenarios': [], + 'scenario_dependencies': [], + 'model_dependencies': [], + 'narratives': [] + }) + empty_store.write_model_run({ + 'name': 'model_run_1', + 'description': 'Sample model run', + 'timesteps': [2010, 2015, 2020, 2025, 2030], + 'sos_model': 'a_sos_model', + 'scenarios': {}, + 'strategies': [], + 'narratives': {} + }) + empty_store.write_model_run({ + 'name': 'model_run_2', + 'description': 'Sample model run', + 'timesteps': [2010, 2015, 2020, 2025, 2030], + 'sos_model': 'a_sos_model', + 'scenarios': {}, + 'strategies': [], + 'narratives': {} + }) + + return Results(store=empty_store) + + +@fixture +def results_with_results(results_no_results): + + sample_output = { + 'name': 'sample_output', + 'dtype': 'float', + 'dims': ['sample_dim'], + 'coords': {'sample_dim': [{'name': 'a'}, {'name': 'b'}]}, + 'unit': 'm' + } + + spec = Spec.from_dict(sample_output) + data = np.zeros((2,), dtype=float) + sample_results = DataArray(spec, data) + + results_no_results._store.write_results(sample_results, 'model_run_1', 'a_model', 2010, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'a_model', 2015, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'a_model', 2020, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'a_model', 2015, 1) + results_no_results._store.write_results(sample_results, 'model_run_1', 'a_model', 2020, 1) + results_no_results._store.write_results(sample_results, 'model_run_1', 'a_model', 2015, 2) + results_no_results._store.write_results(sample_results, 'model_run_1', 'a_model', 2020, 2) + + results_no_results._store.write_results(sample_results, 'model_run_1', 'b_model', 2010, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'b_model', 2015, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'b_model', 2020, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'b_model', 2025, 0) + results_no_results._store.write_results(sample_results, 'model_run_1', 'b_model', 2030, 0) + + results_no_results._store.write_results(sample_results, 'model_run_2', 'b_model', 2010, 0) + results_no_results._store.write_results(sample_results, 'model_run_2', 'b_model', 2015, 0) + results_no_results._store.write_results(sample_results, 'model_run_2', 'b_model', 2020, 0) + results_no_results._store.write_results(sample_results, 'model_run_2', 'b_model', 2025, 0) + results_no_results._store.write_results(sample_results, 'model_run_2', 'b_model', 2030, 0) + + return results_no_results + + +class TestNoResults: + + def test_exceptions(self, empty_store): + + # No arguments is not allowed + with raises(TypeError) as e: + Results() + assert "missing 1 required positional argument: 'store'" in str(e) + + # Check that constructing with just a store works fine + Results(store=empty_store) + + # Check that valid configurations do work (but expect a SmifDataNotFoundError + # because the store creation will fall over + with raises(SmifDataNotFoundError) as e: + Results(store={'interface': 'local_csv', 'dir': '.'}) + assert 'Expected data folder' in str(e) + + with raises(SmifDataNotFoundError) as e: + Results(store={'interface': 'local_parquet', 'dir': '.'}) + assert 'Expected data folder' in str(e) + + # Interface left blank will default to local_csv + with raises(SmifDataNotFoundError) as e: + Results(store={'dir': '.'}) + assert 'Expected data folder' in str(e) + + # Dir left blank will default to '.' + with raises(SmifDataNotFoundError) as e: + Results(store={'interface': 'local_parquet'}) + assert 'Expected data folder' in str(e) + + # Invalid interface will raise a ValueError + with raises(ValueError) as e: + Results(store={'interface': 'invalid', 'dir': '.'}) + assert 'Unsupported interface "invalid"' in str(e) + + # Invalid directory will raise a ValueError + with raises(ValueError) as e: + invalid_dir = os.path.join(os.path.dirname(__file__), 'does', 'not', 'exist') + Results(store={'interface': 'local_csv', 'dir': invalid_dir}) + assert 'to be a valid directory' in str(e) + + def test_list_model_runs(self, results_no_results): + assert results_no_results.list_model_runs() == ['model_run_1', 'model_run_2'] + + def test_list_no_model_runs(self, empty_store): + # Should be no model runs in an empty Results() + results = Results(store=empty_store) + assert results.list_model_runs() == [] + + def test_available_results(self, results_no_results): + available = results_no_results.available_results('model_run_1') + + assert available['model_run'] == 'model_run_1' + assert available['sos_model'] == 'a_sos_model' + assert available['sector_models'] == {} + + +class TestSomeResults: + + def test_available_results(self, results_with_results): + + available = results_with_results.available_results('model_run_1') + + assert available['model_run'] == 'model_run_1' + assert available['sos_model'] == 'a_sos_model' + + sec_models = available['sector_models'] + assert sorted(sec_models.keys()) == ['a_model', 'b_model'] + + # Check a_model outputs are correct + outputs_a = sec_models['a_model']['outputs'] + assert sorted(outputs_a.keys()) == ['sample_output'] + + output_answer_a = {0: [2010, 2015, 2020], 1: [2015, 2020], 2: [2015, 2020]} + assert outputs_a['sample_output'] == output_answer_a + + # Check b_model outputs are correct + outputs_b = sec_models['b_model']['outputs'] + assert sorted(outputs_b.keys()) == ['sample_output'] + + output_answer_b = {0: [2010, 2015, 2020, 2025, 2030]} + assert outputs_b['sample_output'] == output_answer_b + + available = results_with_results.available_results('model_run_2') + + assert available['model_run'] == 'model_run_2' + assert available['sos_model'] == 'a_sos_model' + + sec_models = available['sector_models'] + assert sorted(sec_models.keys()) == ['b_model'] + + # Check a_model outputs are correct + outputs = sec_models['b_model']['outputs'] + assert sorted(outputs_a.keys()) == ['sample_output'] + + output_answer = {0: [2010, 2015, 2020, 2025, 2030]} + assert outputs['sample_output'] == output_answer + + def test_read_validate_names(self, results_with_results): + + # Passing anything other than one sector model or output is current not implemented + with raises(NotImplementedError) as e: + results_with_results.read( + model_run_names=['model_run_1', 'model_run_2'], + model_names=[], + output_names=['sample_output'] + ) + assert 'requires exactly one sector model' in str(e.value) + + with raises(NotImplementedError) as e: + results_with_results.read( + model_run_names=['model_run_1', 'model_run_2'], + model_names=['a_model', 'b_model'], + output_names=['one'] + ) + assert 'requires exactly one sector model' in str(e.value) + + with raises(ValueError) as e: + results_with_results.read( + model_run_names=[], + model_names=['a_model'], + output_names=['sample_output'] + ) + assert 'requires at least one sector model name' in str(e.value) + + with raises(ValueError) as e: + results_with_results.read( + model_run_names=['model_run_1'], + model_names=['a_model'], + output_names=[] + ) + assert 'requires at least one output name' in str(e.value) + + def test_read(self, results_with_results): + + # Read one model run and one output + results_data = results_with_results.read( + model_run_names=['model_run_1'], + model_names=['a_model'], + output_names=['sample_output'] + ) + + expected = pd.DataFrame( + OrderedDict([ + ('model_run', 'model_run_1'), + ('timestep', [2010, 2015, 2015, 2015, 2020, 2020, 2020, + 2010, 2015, 2015, 2015, 2020, 2020, 2020]), + ('decision', [0, 0, 1, 2, 0, 1, 2, 0, 0, 1, 2, 0, 1, 2]), + ('sample_dim', ['a', 'a', 'a', 'a', 'a', 'a', 'a', + 'b', 'b', 'b', 'b', 'b', 'b', 'b']), + ('sample_output', 0.0), + ]) + ) + + pd.testing.assert_frame_equal(results_data, expected) + + # Read two model runs and one output + results_data = results_with_results.read( + model_run_names=['model_run_1', 'model_run_2'], + model_names=['b_model'], + output_names=['sample_output'] + ) + + expected = pd.DataFrame( + OrderedDict([ + ('model_run', ['model_run_1'] * 10 + ['model_run_2'] * 10), + ('timestep', [2010, 2015, 2020, 2025, 2030] * 4), + ('decision', 0), + ('sample_dim', ['a'] * 5 + ['b'] * 5 + ['a'] * 5 + ['b'] * 5), + ('sample_output', 0.0), + ]) + ) + + pd.testing.assert_frame_equal(results_data, expected) diff --git a/tests/data_layer/test_store.py b/tests/data_layer/test_store.py index a42e33959..b34b2537e 100644 --- a/tests/data_layer/test_store.py +++ b/tests/data_layer/test_store.py @@ -382,3 +382,12 @@ def test_canonical_missing_results( missing_results.remove((2015, 0, 'energy_demand', 'gas_demand')) assert(store.canonical_missing_results(model_run['name']) == missing_results) + + def test_get_results(self): + # This is difficult to test without fixtures defining an entire canonical project. + # See smif issue #304 (https://github.com/nismod/smif/issues/304). + # Todo: mock a store with known results that can be obtained with get_results(...) + # This requires a model run with sector model, and a sector model with valid inputs and + # outputs, and results with valid spec, etc. Some of this functionality exists in + # fixtures provided in `conftest.py`. + pass