From 5c3834d7f06906989c9fe93f1c60974bf5c3fd19 Mon Sep 17 00:00:00 2001
From: Bjarne-55 <73470930+Bjarne-55@users.noreply.github.com>
Date: Fri, 18 Oct 2024 11:46:35 +0200
Subject: [PATCH] Add infoclass (#153)

* Fix calculation of average Return

* Add required properties

* Add cloning of observation to prevent bug

* Add ExtraInfo

* Fix Bug in Multiprocess Environment

* Reset_all now returns real info dictionary

* Add assert to prevent None Environment
---
 examples/isaac_example.py                    |   8 +-
 mushroom_rl/core/__init__.py                 |   4 +-
 mushroom_rl/core/array_backend.py            |  86 +++-
 mushroom_rl/core/core.py                     |   2 +
 mushroom_rl/core/dataset.py                  |  45 +-
 mushroom_rl/core/extra_info.py               | 412 +++++++++++++++++
 mushroom_rl/core/multiprocess_environment.py |   5 +-
 mushroom_rl/environments/isaac_env.py        |  13 +-
 tests/core/test_dataset.py                   |   4 +
 tests/core/test_extra_info.py                | 453 +++++++++++++++++++
 10 files changed, 1001 insertions(+), 31 deletions(-)
 create mode 100644 mushroom_rl/core/extra_info.py
 create mode 100644 tests/core/test_extra_info.py

diff --git a/examples/isaac_example.py b/examples/isaac_example.py
index 5e87af8d..5f9728af 100644
--- a/examples/isaac_example.py
+++ b/examples/isaac_example.py
@@ -79,8 +79,8 @@ def experiment(cfg_dict, headless, alg, n_epochs, n_steps, n_steps_per_fit, n_ep
 
     dataset = core.evaluate(n_episodes=n_episodes_test, render=False)
 
-    J = torch.mean(torch.stack(dataset.discounted_return))
-    R = torch.mean(torch.stack(dataset.undiscounted_return))
+    J = torch.mean(dataset.discounted_return)
+    R = torch.mean(dataset.undiscounted_return)
     E = agent.policy.entropy()
 
     logger.epoch_info(0, J=J, R=R, entropy=E)
@@ -89,8 +89,8 @@ def experiment(cfg_dict, headless, alg, n_epochs, n_steps, n_steps_per_fit, n_ep
         core.learn(n_steps=n_steps, n_steps_per_fit=n_steps_per_fit)
         dataset = core.evaluate(n_episodes=n_episodes_test, render=False)
 
-        J = torch.mean(torch.stack(dataset.discounted_return))
-        R = torch.mean(torch.stack(dataset.undiscounted_return))
+        J = torch.mean(dataset.discounted_return)
+        R = torch.mean(dataset.undiscounted_return)
         E = agent.policy.entropy()
 
         logger.epoch_info(it+1, J=J, R=R, entropy=E)
diff --git a/mushroom_rl/core/__init__.py b/mushroom_rl/core/__init__.py
index 814fc9f7..7f1e51d5 100644
--- a/mushroom_rl/core/__init__.py
+++ b/mushroom_rl/core/__init__.py
@@ -6,6 +6,8 @@
 from .serialization import Serializable
 from .logger import Logger
 
+from .extra_info import ExtraInfo
+
 from .vectorized_core import VectorCore
 from .vectorized_env import VectorizedEnvironment
 from .multiprocess_environment import MultiprocessEnvironment
@@ -13,4 +15,4 @@
 import mushroom_rl.environments
 
 __all__ = ['ArrayBackend', 'Core', 'DatasetInfo', 'Dataset', 'Environment', 'MDPInfo', 'Agent', 'AgentInfo',
-           'Serializable', 'Logger', 'VectorCore', 'VectorizedEnvironment', 'MultiprocessEnvironment']
+           'Serializable', 'Logger', 'ExtraInfo', 'VectorCore', 'VectorizedEnvironment', 'MultiprocessEnvironment']
diff --git a/mushroom_rl/core/array_backend.py b/mushroom_rl/core/array_backend.py
index c3b6afdf..706357c4 100644
--- a/mushroom_rl/core/array_backend.py
+++ b/mushroom_rl/core/array_backend.py
@@ -147,6 +147,26 @@ def from_list(array):
     @staticmethod
     def pack_padded_sequence(array, mask):
         raise NotImplementedError
+    
+    @staticmethod
+    def flatten(array):
+        raise NotImplementedError
+    
+    @staticmethod
+    def empty(shape, device=None):
+        raise NotImplementedError
+    
+    @staticmethod
+    def none():
+        raise NotImplementedError
+    
+    @staticmethod
+    def shape(array):
+        raise NotImplementedError
+    
+    @staticmethod
+    def full(shape, value):
+        raise NotImplementedError
 
 
 class NumpyBackend(ArrayBackend):
@@ -253,6 +273,28 @@ def pack_padded_sequence(array, mask):
 
         new_shape = (shape[0] * shape[1],) + shape[2:]
         return array.reshape(new_shape, order='F')[mask.flatten(order='F')]
+    
+    @staticmethod
+    def flatten(array):
+        shape = array.shape
+        new_shape = (shape[0] * shape[1],) + shape[2:]
+        return array.reshape(new_shape, order='F')
+    
+    @staticmethod
+    def empty(shape, device=None):
+        return np.empty(shape)
+    
+    @staticmethod
+    def none():
+        return np.nan
+    
+    @staticmethod
+    def shape(array):
+        return array.shape
+    
+    @staticmethod
+    def full(shape, value):
+        return np.full(shape, value)
 
 
 class TorchBackend(ArrayBackend):
@@ -364,9 +406,31 @@ def pack_padded_sequence(array, mask):
         shape = array.shape
 
         new_shape = (shape[0]*shape[1], ) + shape[2:]
-
+        
         return array.transpose(0, 1).reshape(new_shape)[mask.transpose(0, 1).flatten()]
 
+    @staticmethod
+    def flatten(array):
+        shape = array.shape
+        new_shape = (shape[0]*shape[1], ) + shape[2:]
+        return array.transpose(0, 1).reshape(new_shape)
+
+    @staticmethod
+    def empty(shape, device=None):
+        device = TorchUtils.get_device() if device is None else device
+        return torch.empty(shape, device=device)
+    
+    @staticmethod
+    def none():
+        return torch.nan
+    
+    @staticmethod
+    def shape(array):
+        return array.shape
+    
+    @staticmethod
+    def full(shape, value):
+        return torch.full(shape, value)
 
 class ListBackend(ArrayBackend):
 
@@ -421,3 +485,23 @@ def from_list(array):
     @staticmethod
     def pack_padded_sequence(array, mask):
         return NumpyBackend.pack_padded_sequence(array, np.array(mask))
+    
+    @staticmethod
+    def flatten(array):
+        return NumpyBackend.flatten(array)
+
+    @staticmethod
+    def empty(shape, device=None):
+        return np.empty(shape)
+    
+    @staticmethod
+    def none():
+        return None
+    
+    @staticmethod
+    def shape(array):
+        return np.array(array).shape
+    
+    @staticmethod
+    def full(shape, value):
+        return np.full(shape, value)
\ No newline at end of file
diff --git a/mushroom_rl/core/core.py b/mushroom_rl/core/core.py
index 3dffe448..3478029c 100644
--- a/mushroom_rl/core/core.py
+++ b/mushroom_rl/core/core.py
@@ -128,6 +128,8 @@ def _run(self, dataset, n_steps, n_episodes, render, quiet, record, initial_stat
 
         self._end(record)
 
+        dataset.info.parse()
+        dataset.episode_info.parse()
         return dataset
 
     def _step(self, render, record):
diff --git a/mushroom_rl/core/dataset.py b/mushroom_rl/core/dataset.py
index ab903b0b..c11fd03b 100644
--- a/mushroom_rl/core/dataset.py
+++ b/mushroom_rl/core/dataset.py
@@ -6,6 +6,7 @@
 
 from mushroom_rl.core.serialization import Serializable
 from .array_backend import ArrayBackend
+from .extra_info import ExtraInfo
 
 from ._impl import *
 
@@ -103,8 +104,8 @@ def __init__(self, dataset_info, n_steps=None, n_episodes=None):
         else:
             policy_state_shape = None
 
-        self._info = defaultdict(list)
-        self._episode_info = defaultdict(list)
+        self._info = ExtraInfo(dataset_info.n_envs, dataset_info.backend, dataset_info.device)
+        self._episode_info = ExtraInfo(dataset_info.n_envs, dataset_info.backend, dataset_info.device)
         self._theta_list = list()
 
         if dataset_info.backend == 'numpy':
@@ -195,12 +196,12 @@ def from_array(cls, states, actions, rewards, next_states, absorbings, lasts,
         dataset = cls.create_raw_instance()
 
         if info is None:
-            dataset._info = defaultdict(list)
+            dataset._info = ExtraInfo(1, backend)
         else:
             dataset._info = info.copy()
 
         if episode_info is None:
-            dataset._episode_info = defaultdict(list)
+            dataset._episode_info = ExtraInfo(1, backend)
         else:
             dataset._episode_info = episode_info.copy()
 
@@ -228,7 +229,7 @@ def from_array(cls, states, actions, rewards, next_states, absorbings, lasts,
 
     def append(self, step, info):
         self._data.append(*step)
-        self._append_info(self._info, info)
+        self._info.append(info)
 
     def append_episode_info(self, info):
         self._append_info(self._episode_info, info)
@@ -243,21 +244,17 @@ def get_info(self, field, index=None):
             return self._info[field][index]
 
     def clear(self):
-        self._episode_info = defaultdict(list)
+        self._episode_info.clear()
         self._theta_list = list()
-        self._info = defaultdict(list)
+        self._info.clear()
 
         self._data.clear()
 
     def get_view(self, index, copy=False):
         dataset = self.create_raw_instance(dataset=self)
 
-        info_slice = defaultdict(list)
-        for key in self._info.keys():
-            info_slice[key] = self._info[key][index]
-
-        dataset._info = info_slice
-        dataset._episode_info = defaultdict(list)
+        dataset._info = self._info.get_view(index, copy)
+        dataset._episode_info = self._episode_info.get_view(index, copy)
         dataset._data = self._data.get_view(index, copy)
 
         return dataset
@@ -276,11 +273,9 @@ def __getitem__(self, index):
 
     def __add__(self, other):
         result = self.create_raw_instance(dataset=self)
-        new_info = self._merge_info(self.info, other.info)
-        new_episode_info = self._merge_info(self.episode_info, other.episode_info)
 
-        result._info = new_info
-        result._episode_info = new_episode_info
+        result._info = self._info + other._info
+        result._episode_info = self._episode_info + other._episode_info
         result._theta_list = self._theta_list + other._theta_list
         result._data = self._data + other._data
 
@@ -525,8 +520,8 @@ def _convert(self, *arrays, to='numpy'):
 
     def _add_all_save_attr(self):
         self._add_save_attr(
-            _info='pickle',
-            _episode_info='pickle',
+            _info='mushroom',
+            _episode_info='mushroom',
             _theta_list='pickle',
             _data='mushroom',
             _array_backend='primitive',
@@ -557,7 +552,7 @@ def append(self, step, info):
 
     def append_vectorized(self, step, info, mask):
         self._data.append(*step, mask=mask)
-        self._append_info(self._info, {})  # FIXME: handle properly info
+        self._info.append(info)
 
     def append_theta_vectorized(self, theta, mask):
         for i in range(len(theta)):
@@ -581,11 +576,16 @@ def clear(self, n_steps_per_fit=None):
                 mask.flatten()[n_extra_steps:] = False
                 residual_data.mask = mask.reshape(original_shape)
 
+                residual_info = self._info.get_view(view_size, copy=True)
+                residual_episode_info = self._episode_info.get_view(view_size, copy=True)
+
         super().clear()
         self._initialize_theta_list(n_envs)
 
         if n_steps_per_fit is not None and residual_data is not None:
             self._data = residual_data
+            self._info = residual_info
+            self._episode_info = residual_episode_info
 
     def flatten(self, n_steps_per_fit=None):
         if len(self) == 0:
@@ -622,9 +622,12 @@ def flatten(self, n_steps_per_fit=None):
 
         flat_theta_list = self._flatten_theta_list()
 
+        flat_info = self._info.flatten(self.mask)
+        flat_episode_info = self._episode_info.flatten(self.mask)
+
         return Dataset.from_array(states, actions, rewards, next_states, absorbings, lasts,
                                   policy_state=policy_state, policy_next_state=policy_next_state,
-                                  info=None, episode_info=None, theta_list=flat_theta_list,  # FIXME: handle properly info
+                                  info=flat_info, episode_info=flat_episode_info, theta_list=flat_theta_list,
                                   horizon=self._dataset_info.horizon, gamma=self._dataset_info.gamma,
                                   backend=self._array_backend.get_backend_name())
 
diff --git a/mushroom_rl/core/extra_info.py b/mushroom_rl/core/extra_info.py
new file mode 100644
index 00000000..1b1faf75
--- /dev/null
+++ b/mushroom_rl/core/extra_info.py
@@ -0,0 +1,412 @@
+from collections import UserDict
+import numbers
+from .array_backend import ArrayBackend
+from mushroom_rl.core.serialization import Serializable
+
+class ExtraInfo(Serializable, UserDict):
+    """
+    A class to to collect and parse step information
+    """
+    def __init__(self, n_envs, backend, device=None):
+        """
+        Constructor.
+
+        Args:
+            n_envs (int): Number of parallel environments
+        """
+        self._n_envs = n_envs
+        self._array_backend = ArrayBackend.get_array_backend(backend)
+        self._device = device
+
+        self._storage = []
+        self._key_mapping = {} #maps keys for future output to key paths
+        self._shape_mapping = {} #maps keys to additional shapes for arrays
+        self._structured_storage = {}
+        super().__init__()
+        self._add_all_save_attr()
+
+    def append(self, info):
+        """
+        Append new step information
+
+        Args:
+            info (dict or list): Information to append either list of dicts of every environment, or a dictionary of arrays 
+        """
+        if self._n_envs > 1:
+            assert isinstance(info, (dict, list))
+        else:
+            assert isinstance(info, dict)
+
+        self._storage.append(info)
+
+    def parse(self, to=None):
+        """
+        Parse the stored information into an flat dictionary of arrays
+
+        Args:
+            to (str): the backend to be used for the returned arrays, 'torch' or 'numpy'.
+
+        Returns:
+            dict: Flat dictionary containing an array for every property of the step information
+        """
+        if to is None:
+            to = self._array_backend.get_backend_name()
+
+        #create key mapping
+        for step_data in self._storage:
+            if isinstance(step_data, dict):
+                self._update_key_mapping(step_data, self._n_envs == 1)
+            elif isinstance(step_data, list):
+                for env_data in step_data:
+                    assert isinstance(env_data, dict)
+                    self._update_key_mapping(env_data, True)
+
+        # calculate the size for the array
+        if self._structured_storage:
+            length_structured_storage = self._structured_storage[next(iter(self._structured_storage.keys()))].shape[0]
+        else:
+            length_structured_storage = 0
+        size = (len(self._storage) + length_structured_storage, self._n_envs) if self._n_envs > 1 else (len(self._storage) + length_structured_storage, )
+        
+        #create output dictionary with empty arrays
+        output = {
+            key: ArrayBackend.get_array_backend(to).empty(size + self._shape_mapping[key], self._device)
+            for key in self._key_mapping
+        }
+
+        #fill output with elements stored in structured storage
+        if self._structured_storage:
+            for key in output:
+                index = length_structured_storage
+                value = self._convert(self._structured_storage[key], to)
+                output[key][:index] = value
+        
+        #fill output with elements stored in storage
+        for index, step_data in enumerate(self._storage): 
+            index = index + length_structured_storage
+            if isinstance(step_data, dict):
+                self._append_dict_to_output(output, step_data, index, to)
+            elif isinstance(step_data, list):
+                self._append_list_to_output(output, step_data, index, to)
+
+        self._structured_storage = {key: value for key, value in output.items()}
+        self._storage = []
+        self._array_backend = ArrayBackend.get_array_backend(to)
+            
+        self.data = output
+    
+    def flatten(self, mask=None):
+        """
+        Flattens the arrays in data by combining the first two dimensions.
+
+        Args:
+            mask
+        
+        Returns:
+            ExtraInfo: Flattened ExtraInfo
+        """
+        self.parse()
+
+        info = ExtraInfo(1, self._array_backend.get_backend_name(), self._device)
+        info._shape_mapping = self._shape_mapping
+        info._key_mapping = self._key_mapping
+        info._structured_storage = {}
+
+        for key in self.data:
+            if mask is None:
+                info.data[key] = info._array_backend.flatten(self.data[key])
+            else:
+                info.data[key] = info._array_backend.pack_padded_sequence(self.data[key], mask)
+
+        for key in self._structured_storage:
+            if mask is None:
+                info._structured_storage[key] = info._array_backend.flatten(self._structured_storage[key])
+            else:
+                info._structured_storage[key] = info._array_backend.pack_padded_sequence(self._structured_storage[key], mask)
+
+        return info
+        
+    def __add__(self, other):
+        """
+        Returns new object which combines two ExtraInfo objects.
+
+        Args:
+            other(ExtraInfo): other ExtraInfo which will be combined with self
+        """
+        assert(self._n_envs == other.n_envs)
+
+        info = ExtraInfo(self._n_envs, self._array_backend.get_backend_name(), self._device)
+        info._storage = self._storage + other._storage
+
+        info._structured_storage = self._concatenate_dictionary(self._structured_storage, other._structured_storage, self._array_backend, other._array_backend)
+        info.data = self._concatenate_dictionary(self.data, other.data, self._array_backend, other._array_backend)
+
+        #combine key_mapping
+        info._key_mapping = self._key_mapping.copy()
+        info._key_mapping.update(other._key_mapping)
+
+        #combine shape_mapping
+        info._shape_mapping = self._shape_mapping.copy()
+        info._shape_mapping.update(other._shape_mapping)
+        
+        return info
+    
+    def _concatenate_array(self, array1, array2, intended_length_array1, intended_length_array2, array1_backend, array2_backend):
+        """
+        Concatenate array1 with array2
+
+        Args:
+            array1 (array, None)
+            array2 (array, None)
+            intended_length_array1 (int): Intended Length of array1 in case array1 is None
+            intended_length_array2 (int): Intended Length of array2 in case array2 is None
+            array1_backend (ArrayBackend): Backend of array1
+            array2_backend (ArrayBackend): Backend of array2
+        
+        Returns:
+            array: Concatenation of array1 and array2
+        """
+        if array1 is None:
+            shape = (intended_length_array1,) + array2_backend.shape(array2)[1:]
+            array1 = array1_backend.full(shape, array1_backend.none())
+        if array2 is None:
+            shape = (intended_length_array2, ) +  array1_backend.shape(array1)[1:]
+            array2 = array2_backend.full(shape, array2_backend.none())
+        array2 = array1_backend.convert(array2, backend=array2_backend)
+        return array1_backend.concatenate((array1, array2))
+    
+    def _concatenate_dictionary(self, dict1, dict2, backend1, backend2):
+        """
+        Concatenate dict1 with dict2.
+
+        Args:
+            dict1 (dict): Flat dictionary containing arrays of backend1
+            dict2 (dict): Flat dictionary containing arrays of backend2
+            backend1 (ArrayBackend): Backend of arrays in dict1.
+            backend2 (ArrayBackend): Backend of arrays in dict2.
+
+        Returns
+            dict: Concatenation of dict1 and dict2
+        """
+        if not dict1:
+            return dict2
+        if not dict2:
+            return dict1
+        
+        array_length_dict1 = backend1.shape(dict1[next(iter(dict1.keys()))])[0]
+        array_length_dict2 = backend2.shape(dict2[next(iter(dict2.keys()))])[0]
+
+        r = {}
+
+        for key in dict1.keys() | dict2.keys():
+            array1 = dict1[key] if key in dict1 else None
+            array2 = dict2[key] if key in dict2 else None
+            r[key] = self._concatenate_array(array1, array2, array_length_dict1, array_length_dict2, backend1, backend2)
+        return r
+
+
+    def copy(self):
+        info = ExtraInfo(self._n_envs, self._array_backend.get_backend_name(), self._device)
+        info._storage = self._storage.copy()
+        info._key_mapping = self._key_mapping.copy()
+        info._shape_mapping = self._shape_mapping.copy()
+        info.data = self.data.copy()
+            
+        return info
+
+    def get_view(self, index, copy=False):
+        """
+        Returns ExtraInfo Object which only contains the specified indexes
+
+        Args:
+            index (int, slice, ndarray, tensor): indexes which the return should contain
+            copy (bool): wether content of ExtraInfo object should be copied
+        """
+        self.parse()
+        info = ExtraInfo(self._n_envs, self._array_backend.get_backend_name(), self._device)
+        info._key_mapping = self._key_mapping
+        info._shape_mapping = self._shape_mapping
+
+        if not copy:
+            info._structured_storage = {key: value[index, ...] for key, value in self._structured_storage.items()}
+            info.data = {key: value[index, ...] for key, value in self.data.items()}
+        else:
+            for key, value in self._structured_storage.items():
+                value = value[index, ...]
+                info._structured_storage[key] = self._array_backend.empty(value.shape, self._device)
+                info._structured_storage[key][:] = value
+            
+            for key, value in self.data.items():
+                value = value[index, ...]
+                info.data[key] = self._array_backend.empty(value.shape, self._device)
+                info.data[key][:] = value
+        
+        return info
+    
+    def clear(self):
+        self._storage = []
+        self._key_mapping = {}
+        self._shape_mapping = {}
+        self._structured_storage = {}
+        self.data = {}
+
+    def _add_all_save_attr(self):
+        self._add_save_attr(
+            data='primitive',
+            _storage='primitive',
+            _structured_storage='primitive',
+            _key_mapping='primitive',
+            _shape_mapping='primitive'
+        )
+
+    def _update_key_mapping(self, template, single_env):
+        """
+        Update the pattern and the key_paths with the keys from the given template
+
+        Args:
+            template (dict): Dictionary to extract the keys from
+            single_env (bool): Wether template contains data for only one environment
+        """
+        assert(isinstance(template, dict))
+
+        # Stack to store dictionaries and their parent key
+        stack = [(template, [])]
+
+        while stack:
+            structure_element, parent_keys = stack.pop()
+            assert isinstance(structure_element, dict)
+
+            #Iterate over the dict
+            for key, value in structure_element.items():
+                key_path = parent_keys + [key]
+
+                # skip if key is already in key_mapping
+                if key_path in self._key_mapping.values():
+                    continue
+
+                if isinstance(value, dict):
+                    stack.append((value, key_path))
+                else:
+                    new_key = self._create_key(key_path)
+                    self._store_array_shape(new_key, value, single_env)
+
+    def _append_dict_to_output(self, output, step_data, index, to):
+        """
+        Append a dictionary to the output arrays.
+
+        Args:
+            output (dict): Flat dictionary containing the arrays
+            step_data (dict): Containing the step information for one step
+            index (int): index of the step
+            to (str): Target format
+        """
+        for key, key_path in self._key_mapping.items():
+            value = self._find_element_by_key_path(step_data, key_path)
+            value = self._convert(value, to)
+            output[key][index] = value
+    
+    def _append_list_to_output(self, output, step_data, index, to):
+        """
+        Append a list to the output arrays.
+
+        Args:
+            output (list): Flat dictionary containing the arrays
+            step_data (dict): List containing the step information in form of a dictionary for every environment
+            index (int): index of the step
+            to (str): Target format
+        """
+        assert(self._n_envs > 1)
+        for key, key_path in self._key_mapping.items():
+            for i, env_data in enumerate(step_data):
+                value = self._find_element_by_key_path(env_data, key_path)
+                value = self._convert(value, to)
+                output[key][index][i] = value
+
+    def _find_element_by_key_path(self, source, key_path):
+        """
+        Find the value in source corresponding to the key path.
+
+        Args:
+            source (dict): Dictionary to search in.
+            key_path (list): List of keys.
+
+        Returns:
+            The found value or None if any key is missing.
+        """
+        current = source
+        for key in key_path:
+            if key in current:
+                current = current[key]
+            else:
+                return None
+        return current
+    
+    def _convert(self, value, to):
+        """
+        Convert value to the target format.
+
+        Args:
+            value: Value to convert.
+            to (str): Target format, 'torch' or 'numpy'.
+
+        Returns:
+            Converted value.
+        """
+        if isinstance(value, numbers.Number):
+            return value
+        
+        if value is None:
+            return ArrayBackend.get_array_backend(to).none()
+        
+        return ArrayBackend.convert(value, to=to, backend=self._array_backend)
+
+    def _create_key(self, key_path):
+        """
+        Creates single key in pattern from a list of keys.
+
+        Args:
+            key_path (list): List of keys to combine.
+        
+        Returns:
+            key (str): Created key.
+        """
+        key = "_".join(str(key) for key in key_path)
+        self._key_mapping[key] = key_path
+        return key
+
+    def _store_array_shape(self, key, value, single_env):
+        """
+        Stores the shape of the value. If value does not have a shape, an empty tuple is stored.
+
+        Args:
+            key (str): Dictionary key.
+            value (Array, Number): Variable whose shape should be saved
+            sinlge_env (bool): 
+        """
+        if isinstance(value, numbers.Number):
+            self._shape_mapping[key] = ()
+        else:
+            shape = self._array_backend.shape(value)
+            self._shape_mapping[key] = shape[1:] if not single_env else shape
+    
+    @property
+    def n_envs(self):
+        return self._n_envs
+
+    def __setitem__(self, key, value):
+        raise TypeError("This dictionary is read-only.")
+
+    def __delitem__(self, key):
+        raise TypeError("This dictionary is read-only.")
+
+    def pop(self, key, default=None):
+        raise TypeError("This dictionary is read-only.")
+
+    def popitem(self):
+        raise TypeError("This dictionary is read-only.")
+
+    def setdefault(self, key, default=None):
+        raise TypeError("This dictionary is read-only.")
+
+    def update(self, *args, **kwargs):
+        raise TypeError("This dictionary is read-only.")
\ No newline at end of file
diff --git a/mushroom_rl/core/multiprocess_environment.py b/mushroom_rl/core/multiprocess_environment.py
index b8593d8b..6f566851 100644
--- a/mushroom_rl/core/multiprocess_environment.py
+++ b/mushroom_rl/core/multiprocess_environment.py
@@ -66,6 +66,7 @@ def __init__(self, env_class, *args, n_envs=-1, use_generator=False, **kwargs):
             **kwargs: keyword arguments to set to the constructor or to the generator;
 
         """
+        assert env_class is not None, "Environment class requires not installed module."
         assert n_envs > 1 or n_envs == -1
 
         if n_envs == -1:
@@ -107,7 +108,7 @@ def reset_all(self, env_mask, state=None):
             else:
                 episode_infos.append({})
 
-        return self._states, episode_infos
+        return self._states.copy(), episode_infos.copy()
 
     def step_all(self, env_mask, action):
         for i, remote in enumerate(self._remotes):
@@ -129,7 +130,7 @@ def step_all(self, env_mask, action):
             else:
                 step_infos.append({})
 
-        return self._states.copy(), rewards, absorbings, step_infos
+        return self._states.copy(), rewards.copy(), absorbings.copy(), step_infos.copy()
 
     def render_all(self, env_mask, record=False):
         for i, remote in enumerate(self._remotes):
diff --git a/mushroom_rl/environments/isaac_env.py b/mushroom_rl/environments/isaac_env.py
index 6215090c..ec94f158 100644
--- a/mushroom_rl/environments/isaac_env.py
+++ b/mushroom_rl/environments/isaac_env.py
@@ -90,8 +90,9 @@ def reset_all(self, env_mask, state=None):
             self._task.reset_idx(idxs)
         # self._world.step(render=self._render) # TODO Check if we can do otherwise
         task_obs = self._task.get_observations()
+        task_extras = self._task.get_extras()
         observation = convert_task_observation(task_obs)
-        return observation, [{}]*self._n_envs
+        return observation.clone(), [task_extras]*self._n_envs
 
     def step_all(self, env_mask, action):
         self._task.pre_physics_step(action)
@@ -106,7 +107,7 @@ def step_all(self, env_mask, action):
         
         env_mask_cuda = torch.as_tensor(env_mask).cuda()
         
-        return observation, reward, torch.logical_and(done, env_mask_cuda), [info]*self._n_envs
+        return observation.clone(), reward, torch.logical_and(done, env_mask_cuda), [info]*self._n_envs
 
     def render_all(self, env_mask, record=False):
         self._world.render()
@@ -132,3 +133,11 @@ def _convert_gym_space(space):
             return Box(low=space.low, high=space.high, shape=space.shape)
         else:
             raise ValueError
+
+    @property
+    def world(self):
+        return self._world
+
+    @property
+    def render_enabled(self):
+        return self._render
\ No newline at end of file
diff --git a/tests/core/test_dataset.py b/tests/core/test_dataset.py
index e55a8ac1..c99c6bfa 100644
--- a/tests/core/test_dataset.py
+++ b/tests/core/test_dataset.py
@@ -126,4 +126,8 @@ def test_dataset_loading(tmpdir):
 
     assert dataset._dataset_info.gamma == new_dataset._dataset_info.gamma
 
+    assert len(dataset.info) == len(new_dataset.info)
+    for key in dataset.info:
+        assert np.array_equal(dataset.info[key], new_dataset.info[key])
+
 
diff --git a/tests/core/test_extra_info.py b/tests/core/test_extra_info.py
new file mode 100644
index 00000000..51c22b25
--- /dev/null
+++ b/tests/core/test_extra_info.py
@@ -0,0 +1,453 @@
+from mushroom_rl.core import ExtraInfo
+import torch
+import numpy as np
+
+def test_list_of_dict():
+    info = ExtraInfo(6, 'numpy')
+
+    data = []
+    for i in range(6):
+        single_step_data = {
+            'prop1': 100 + i,
+            'prop2': np.arange(300 + i, 300 + i + 0.5, 0.1),
+            'prop3': {
+                'x': 400 + i,
+                'y': 500 + i
+            }
+        }
+        data.append(single_step_data)
+    
+    data2 = []
+    for i in range(6):
+        single_step_data = {
+            'prop1': 110 + i,
+            'prop2': np.arange(310 + i, 310 + i + 0.5, 0.1),
+            'prop3': {
+                'x': 410 + i,
+                'y': 510 + i
+            }
+        }
+        data2.append(single_step_data)
+
+    info.append(data)
+    info.append(data2)
+
+    info.parse(to='torch')
+
+    assert(len(info) == 4)
+    assert(torch.is_tensor(info["prop1"]))
+    assert(torch.is_tensor(info["prop2"]))
+    assert(torch.is_tensor(info["prop3_x"]))
+    assert(torch.is_tensor(info["prop3_y"]))
+    assert(info["prop1"].dim() == 2 and info["prop1"].size(0) == 2 and info["prop1"].size(1) == 6)
+    assert(info["prop2"].dim() == 3 and info["prop2"].size(0) == 2 and info["prop2"].size(1) == 6 and info["prop2"].size(2) == 5)
+    assert(info["prop3_x"].dim() == 2 and info["prop3_x"].size(0) == 2 and info["prop3_x"].size(1) == 6)
+    assert(info["prop3_y"].dim() == 2 and info["prop3_y"].size(0) == 2 and info["prop3_y"].size(1) == 6)
+
+    info = info.flatten()
+
+    assert(len(info) == 4)
+    assert(torch.is_tensor(info["prop1"]))
+    assert(torch.is_tensor(info["prop2"]))
+    assert(torch.is_tensor(info["prop3_x"]))
+    assert(torch.is_tensor(info["prop3_y"]))
+    assert(info["prop1"].dim() == 1 and info["prop1"].size(0) == 12)
+    assert(info["prop2"].dim() == 2 and info["prop2"].size(0) == 12 and info["prop2"].size(1) == 5)
+    assert(info["prop3_x"].dim() == 1 and info["prop3_x"].size(0) == 12)
+    assert(info["prop3_y"].dim() == 1 and info["prop3_y"].size(0) == 12)
+
+    prop1 = torch.tensor([100, 110, 101, 111, 102, 112, 103, 113, 104, 114, 105, 115])
+    prop3_x = torch.tensor([400, 410, 401, 411, 402, 412, 403, 413, 404, 414, 405, 415])
+    prop3_y = torch.tensor([500, 510, 501, 511, 502, 512, 503, 513, 504, 514, 505, 515])
+    assert torch.equal(prop1, info["prop1"])
+    assert torch.equal(prop3_x, info["prop3_x"])
+    assert torch.equal(prop3_y, info["prop3_y"])
+
+    info.parse(to='torch')
+
+    assert(len(info) == 4)
+    assert(torch.is_tensor(info["prop1"]))
+    assert(torch.is_tensor(info["prop2"]))
+    assert(torch.is_tensor(info["prop3_x"]))
+    assert(torch.is_tensor(info["prop3_y"]))
+    assert(info["prop1"].dim() == 1 and info["prop1"].size(0) == 12)
+    assert(info["prop2"].dim() == 2 and info["prop2"].size(0) == 12 and info["prop2"].size(1) == 5)
+    assert(info["prop3_x"].dim() == 1 and info["prop3_x"].size(0) == 12)
+    assert(info["prop3_y"].dim() == 1 and info["prop3_y"].size(0) == 12)
+
+def test_dict_of_torch():
+    info = ExtraInfo(4, 'torch')
+    data1 = {
+        'prop1': torch.arange(100, 104),
+        'prop2': torch.tensor([[200.0, 200.5], [201.0, 201.5], [202.0, 202.5], [203.0, 203.5]]),
+        'prop3': {
+            'x': torch.arange(300, 304)
+        }
+    }
+    data2 = {
+        'prop1': torch.arange(110, 114),
+        'prop2': torch.tensor([[210.0, 210.5], [211.0, 211.5], [212.0, 212.5], [213.0, 213.5]]),
+        'prop3': {
+            'x': torch.arange(310, 314)
+        }
+    }
+    info.append(data1)
+    info.append(data2)
+
+    info.parse(to='numpy')
+
+    assert(len(info) == 3)
+    assert(isinstance(info["prop1"], np.ndarray))
+    assert(isinstance(info["prop2"], np.ndarray))
+    assert(isinstance(info["prop3_x"], np.ndarray))
+    assert(info["prop1"].ndim == 2 and info["prop1"].shape[0] == 2 and info["prop1"].shape[1] == 4)
+    assert(info["prop2"].ndim == 3 and info["prop2"].shape[0] == 2 and info["prop2"].shape[1] == 4 and info["prop2"].shape[2] == 2)
+    assert(info["prop3_x"].ndim == 2 and info["prop3_x"].shape[0] == 2 and info["prop3_x"].shape[1] == 4)
+
+    info = info.flatten()
+
+    assert(len(info) == 3)
+    assert(isinstance(info["prop1"], np.ndarray))
+    assert(isinstance(info["prop2"], np.ndarray))
+    assert(isinstance(info["prop3_x"], np.ndarray))
+    assert(info["prop1"].ndim == 1 and info["prop1"].shape[0] == 8)
+    assert(info["prop2"].ndim == 2 and info["prop2"].shape[0] == 8 and info["prop2"].shape[1] == 2)
+    assert(info["prop3_x"].ndim == 1 and info["prop3_x"].shape[0] == 8)
+
+    assert np.array_equal(np.array([100, 110, 101, 111, 102, 112, 103, 113]), info["prop1"])
+    prop2 = np.array([[200.0, 200.5], [210.0, 210.5], [201.0, 201.5], [211.0, 211.5], 
+                      [202.0, 202.5], [212.0, 212.5], [203.0, 203.5], [213.0, 213.5]])
+    assert np.array_equal(prop2, info["prop2"])
+    assert np.array_equal(np.array([300, 310, 301, 311, 302, 312, 303, 313]), info["prop3_x"])
+
+    info.parse()
+
+    assert(len(info) == 3)
+    assert(isinstance(info["prop1"], np.ndarray))
+    assert(isinstance(info["prop2"], np.ndarray))
+    assert(isinstance(info["prop3_x"], np.ndarray))
+    assert(info["prop1"].ndim == 1 and info["prop1"].shape[0] == 8)
+    assert(info["prop2"].ndim == 2 and info["prop2"].shape[0] == 8 and info["prop2"].shape[1] == 2)
+    assert(info["prop3_x"].ndim == 1 and info["prop3_x"].shape[0] == 8)
+
+def test_empty_dict_in_list():
+    info = ExtraInfo(3, 'torch')
+
+    data1 = {
+        'prop1': 100,
+        'prop2': 200
+    }
+    data2 = {}
+    data3 = {
+        'prop1': 102,
+        'prop2': 202
+    }
+    info.append([data1, data2, data3])
+    info = info.flatten()
+    print(info)
+    assert(len(info) == 2)
+
+    assert("prop1" in info)
+    assert("prop2" in info)
+
+    assert(torch.is_tensor(info["prop1"]))
+    assert(torch.is_tensor(info["prop2"]))
+
+    assert(info["prop1"].dim() == 1 and info["prop1"].size(0) == 3)
+    assert(info["prop2"].dim() == 1 and info["prop2"].size(0) == 3)
+    
+    assert(info["prop1"][0] == 100 and info["prop2"][0] == 200)
+    assert(torch.isnan(info["prop1"][1]) and torch.isnan(info["prop2"][1]))
+    assert(info["prop1"][2] == 102 and info["prop2"][2] == 202)
+
+def test_empty_dict():
+    info = ExtraInfo(2, 'numpy')
+    data1 = {
+        'prop1': np.arange(100, 102)
+    }
+    data2 = {}
+    data3 = {
+        'prop1': np.arange(120, 122)
+    }
+    info.append(data1)
+    info.append(data2)
+    info.append(data3)
+    info = info.flatten()
+    print(info)
+
+    assert(len(info) == 1)
+    assert("prop1" in info)
+    assert(isinstance(info["prop1"], np.ndarray))
+    assert(info["prop1"].ndim == 1 and info["prop1"].shape[0] == 6)
+
+    assert info["prop1"][0] == 100
+    assert np.isnan(info["prop1"][1])
+    assert info["prop1"][2] == 120
+    assert info["prop1"][3] == 101
+    assert np.isnan(info["prop1"][4])
+    assert info["prop1"][5] == 121
+
+def test_changing_properties_dict():
+    info = ExtraInfo(2, 'numpy')
+    data1 = {
+        'prop2': np.arange(200, 202),
+        'prop3': np.arange(300, 302)
+    }
+    data2 = {
+        'prop2': np.arange(210, 212),
+        'prop4': np.arange(410, 412)
+    }
+    data3 = {
+        'prop2': np.arange(220, 222),
+        'prop3': np.arange(320, 322)
+    }
+    info.append(data1)
+    info.append(data2)
+    info.append(data3)
+    info.parse(to='torch')
+    info = info.flatten()
+    
+    print(info)
+
+    assert(len(info) == 3)
+
+    assert("prop2" in info)
+    assert("prop3" in info)
+    assert("prop4" in info)
+
+    assert(torch.is_tensor(info["prop2"]))
+    assert(torch.is_tensor(info["prop3"]))
+    assert(torch.is_tensor(info["prop4"]))
+
+    assert(info["prop2"].dim() == 1 and info["prop2"].size(0) == 6)
+    assert(info["prop3"].dim() == 1 and info["prop3"].size(0) == 6)
+    assert(info["prop4"].dim() == 1 and info["prop4"].size(0) == 6)
+
+    assert info["prop2"][0] == 200 and info["prop3"][0] == 300 and torch.isnan(info["prop4"][0])
+    assert info["prop2"][1] == 210 and torch.isnan(info["prop3"][1]) and info["prop4"][1] == 410
+    assert info["prop2"][2] == 220 and info["prop3"][2] == 320 and torch.isnan(info["prop4"][2])
+    assert info["prop2"][3] == 201 and info["prop3"][3] == 301 and torch.isnan(info["prop4"][3])
+    assert info["prop2"][4] == 211 and torch.isnan(info["prop3"][4]) and info["prop4"][4] == 411
+    assert info["prop2"][5] == 221 and info["prop3"][5] == 321 and torch.isnan(info["prop4"][5])
+
+def test_one_environment():
+    info = ExtraInfo(1, 'torch')
+    data1 = {
+        'prop1': torch.arange(100, 103),
+        'prop2': torch.randn(3, 2),
+        'prop3': 1
+    }
+    data2 = {
+        'prop1': torch.arange(110, 113),
+        'prop2': torch.randn(3, 2),
+        'prop3': 2
+    }
+    data3 = {
+        'prop1': torch.arange(120, 123),
+        'prop2': torch.randn(3, 2),
+        'prop3': 3
+    }
+    info.append(data1)
+    info.append(data2)
+    info.append(data3)
+    info.parse('torch')
+    print(info)
+    
+    assert(len(info) == 3)
+
+    assert("prop1" in info)
+    assert("prop2" in info)
+    assert("prop3" in info)
+
+    assert(torch.is_tensor(info["prop1"]))
+    assert(torch.is_tensor(info["prop2"]))
+    assert(torch.is_tensor(info["prop3"]))
+
+    assert(info["prop1"].dim() == 2 and info["prop1"].size(0) == 3 and info["prop2"].size(1) == 3)
+    assert(info["prop2"].dim() == 3 and info["prop2"].size(0) == 3 and info["prop2"].size(1) == 3 and info["prop2"].size(2) == 2)
+    assert(info["prop3"].dim() == 1 and info["prop3"].size(0) == 3)
+
+def test_get_view_slice():
+    info = ExtraInfo(3, 'torch')
+    data1 = {
+        'prop1': torch.arange(100, 103),
+        'prop3': torch.randn(3, 2)
+    }
+    data2 = {
+        'prop1': torch.arange(110, 113),
+        'prop3': torch.randn(3, 2)
+    }
+
+    info.append(data1)
+    info.append(data2)
+
+    info = info.flatten()
+    info = info.get_view(slice(4))
+    info.parse('torch')
+
+    assert(len(info) == 2)
+
+    assert("prop1" in info)
+    assert("prop3" in info)
+
+    assert(torch.is_tensor(info["prop1"]))
+    assert(torch.is_tensor(info["prop3"]))
+
+    assert(info["prop1"].dim() == 1 and info["prop1"].size(0) == 4)
+    assert(info["prop3"].dim() == 2 and info["prop3"].size(0) == 4 and info["prop3"].size(1) == 2)
+
+    assert(info["prop1"][0] == 100)
+    assert(info["prop1"][1] == 110)
+    assert(info["prop1"][2] == 101)
+    assert(info["prop1"][3] == 111)
+
+def test_get_view_array():
+    info = ExtraInfo(3, 'torch')
+    data1 = {
+        'prop1': torch.arange(100, 103),
+        'prop3': torch.randn(3, 2)
+    }
+    data2 = {
+        'prop1': torch.arange(110, 113),
+        'prop3': torch.randn(3, 2)
+    }
+
+    info.append(data1)
+    info.append(data2)
+
+    info = info.flatten()
+    info = info.get_view(np.array([1, 2, 5]), True)
+    info.parse('torch')
+    print(info)
+
+    assert(len(info) == 2)
+
+    assert("prop1" in info)
+    assert("prop3" in info)
+
+    assert(torch.is_tensor(info["prop1"]))
+    assert(torch.is_tensor(info["prop3"]))
+
+    assert(info["prop1"].dim() == 1 and info["prop1"].size(0) == 3)
+    assert(info["prop3"].dim() == 2 and info["prop3"].size(0) == 3 and info["prop3"].size(1) == 2)
+
+    assert(info["prop1"][0] == 110)
+    assert(info["prop1"][1] == 101)
+    assert(info["prop1"][2] == 112)
+
+def test_add():
+    info1 = ExtraInfo(10, 'numpy')
+    data1 = {
+        'prop1': np.arange(100, 110),
+        'prop2': np.arange(200, 210)
+    }
+    data2 = {
+        'prop1': np.arange(110, 120),
+        'prop2': np.arange(210, 220)
+    }
+    info1.append(data1)
+    info1.append(data2)
+
+    info2 = ExtraInfo(10, 'torch')
+    data1 = {
+        'prop1': torch.arange(100, 110, dtype=torch.float32),
+        'prop3': torch.arange(300, 310, dtype=torch.float32)
+    }
+    data2 = {
+        'prop1': torch.arange(110, 120),
+        'prop3': torch.arange(310, 320)
+    }
+    info2.append(data1)
+    info2.append(data2)
+
+    info1.parse('torch')
+    info2.parse('numpy')
+
+    info = info1 + info2
+
+    assert(len(info) == 3)
+
+    assert("prop1" in info)
+    assert("prop2" in info)
+    assert("prop3" in info)
+
+    assert(torch.is_tensor(info["prop1"]))
+    assert(torch.is_tensor(info["prop2"]))
+    assert(torch.is_tensor(info["prop3"]))
+
+    assert(info["prop1"].dim() == 2 and info["prop1"].size(0) == 4 and info["prop1"].size(1) == 10)
+    assert(info["prop2"].dim() == 2 and info["prop2"].size(0) == 4 and info["prop2"].size(1) == 10)
+    assert(info["prop3"].dim() == 2 and info["prop3"].size(0) == 4 and info["prop3"].size(1) == 10)
+
+    for i in range(2):
+        for j in range(10):
+            assert(info["prop1"][i][j] == 100 + i*10 + j)
+            assert(info["prop2"][i][j] == 200 + i*10 + j)
+            assert(torch.isnan(info["prop3"][i][j]))
+    
+    for i in range(2):
+        for j in range(10):
+            assert(info["prop1"][2 + i][j] == 100 + i*10 + j)
+            assert(torch.isnan(info["prop2"][2 + i][j]))
+            assert(info["prop3"][2 + i][j] == 300 + i*10 + j)
+
+def test_clear():
+    info = ExtraInfo(10, 'numpy')
+    data1 = {
+        'prop1': np.arange(100, 110),
+        'prop2': np.arange(200, 210)
+    }
+    data2 = {
+        'prop1': np.arange(110, 120),
+        'prop2': np.arange(210, 220)
+    }
+    info.append(data1)
+    info.append(data2)
+    info.parse()
+    info.clear()
+    assert(not info)
+
+def test_flatten_with_mask():
+    info = ExtraInfo(5, 'numpy')
+    data1 = {
+        'prop1': np.arange(100, 105),
+        'prop2': np.arange(200, 205)
+    }
+    data2 = {
+        'prop1': np.arange(110, 115),
+        'prop2': np.arange(210, 215)
+    }
+    info.append(data1)
+    info.append(data2)
+    mask = np.array([True, True, False, False, False, True, False, False, True, False])
+    info = info.flatten(mask)
+
+    assert(len(info) == 2)
+
+    assert("prop1" in info)
+    assert("prop2" in info)
+
+    assert(isinstance(info["prop1"], np.ndarray))
+    assert(isinstance(info["prop2"], np.ndarray))
+
+    assert(info["prop1"].ndim == 1 and info["prop1"].shape[0] == 4)
+    assert(info["prop2"].ndim == 1 and info["prop2"].shape[0] == 4)
+
+    assert np.array_equal(np.array([100, 110, 112, 104]), info["prop1"])
+    assert np.array_equal(np.array([200, 210, 212, 204]), info["prop2"])
+
+    #Test if mask is permantly applied
+    info.parse()
+    assert(len(info) == 2)
+
+    assert("prop1" in info)
+    assert("prop2" in info)
+
+    assert(isinstance(info["prop1"], np.ndarray))
+    assert(isinstance(info["prop2"], np.ndarray))
+
+    assert(info["prop1"].ndim == 1 and info["prop1"].shape[0] == 4)
+    assert(info["prop2"].ndim == 1 and info["prop2"].shape[0] == 4)
+
+    assert np.array_equal(np.array([100, 110, 112, 104]), info["prop1"])
+    assert np.array_equal(np.array([200, 210, 212, 204]), info["prop2"])