Renamed Exponential Parameter in Decay Parameter

MushroomRL · Dec 7, 2023 · 584aa86 · 584aa86
1 parent 1ae4000
commit 584aa86
Show file tree

Hide file tree

Showing 8 changed files with 118 additions and 114 deletions.
diff --git a/examples/double_chain_q_learning/double_chain.py b/examples/double_chain_q_learning/double_chain.py
@@ -8,7 +8,7 @@
 from mushroom_rl.environments import *
 from mushroom_rl.policy import EpsGreedy
 from mushroom_rl.utils.callbacks import CollectQ
-from mushroom_rl.rl_utils.parameters import Parameter, ExponentialParameter
+from mushroom_rl.rl_utils.parameters import Parameter, DecayParameter
 
 
 """
@@ -33,7 +33,7 @@ def experiment(algorithm_class, exp):
     pi = EpsGreedy(epsilon=epsilon)
 
     # Agent
-    learning_rate = ExponentialParameter(value=1., exp=exp, size=mdp.info.size)
+    learning_rate = DecayParameter(value=1., exp=exp, size=mdp.info.size)
     algorithm_params = dict(learning_rate=learning_rate)
     agent = algorithm_class(mdp.info, pi, **algorithm_params)
 

diff --git a/examples/grid_world_td.py b/examples/grid_world_td.py
@@ -11,7 +11,7 @@
 from mushroom_rl.environments import *
 from mushroom_rl.policy import EpsGreedy
 from mushroom_rl.utils.callbacks import CollectDataset, CollectMaxQ
-from mushroom_rl.rl_utils.parameters import ExponentialParameter
+from mushroom_rl.rl_utils.parameters import DecayParameter
 
 
 """
@@ -31,11 +31,11 @@ def experiment(algorithm_class, exp):
     mdp = GridWorldVanHasselt()
 
     # Policy
-    epsilon = ExponentialParameter(value=1, exp=.5, size=mdp.info.observation_space.size)
+    epsilon = DecayParameter(value=1, exp=.5, size=mdp.info.observation_space.size)
     pi = EpsGreedy(epsilon=epsilon)
 
     # Agent
-    learning_rate = ExponentialParameter(value=1, exp=exp, size=mdp.info.size)
+    learning_rate = DecayParameter(value=1, exp=exp, size=mdp.info.size)
     algorithm_params = dict(learning_rate=learning_rate)
     agent = algorithm_class(mdp.info, pi, **algorithm_params)
 

diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py
@@ -18,45 +18,34 @@ def __init__(self, xml_file, actuation_spec, observation_spec, gamma, horizon, t
         Constructor.
 
         Args:
-             xml_file (str/xml handle): A string with a path to the xml or an Mujoco xml handle.
-             actuation_spec (list): A list specifying the names of the joints
-                which should be controllable by the agent. Can be left empty
-                when all actuators should be used;
-             observation_spec (list): A list containing the names of data that
-                should be made available to the agent as an observation and
-                their type (ObservationType). They are combined with a key,
-                which is used to access the data. An entry in the list
-                is given by: (key, name, type). The name can later be used
-                to retrieve specific observations;
-             gamma (float): The discounting factor of the environment;
-             horizon (int): The maximum horizon for the environment;
-             timestep (float): The timestep used by the MuJoCo
-                simulator. If None, the default timestep specified in the XML will be used;
-             n_substeps (int, 1): The number of substeps to use by the MuJoCo
-                simulator. An action given by the agent will be applied for
-                n_substeps before the agent receives the next observation and
-                can act accordingly;
-             n_intermediate_steps (int, 1): The number of steps between every action
-                taken by the agent. Similar to n_substeps but allows the user
-                to modify, control and access intermediate states.
-             additional_data_spec (list, None): A list containing the data fields of
-                interest, which should be read from or written to during
-                simulation. The entries are given as the following tuples:
-                (key, name, type) key is a string for later referencing in the
-                "read_data" and "write_data" methods. The name is the name of
-                the object in the XML specification and the type is the
-                ObservationType;
-             collision_groups (list, None): A list containing groups of geoms for
-                which collisions should be checked during simulation via
-                ``check_collision``. The entries are given as:
-                ``(key, geom_names)``, where key is a string for later
-                referencing in the "check_collision" method, and geom_names is
-                a list of geom names in the XML specification.
-             max_joint_vel (list, None): A list with the maximum joint velocities which are provided in the mdp_info.
-                The list has to define a maximum velocity for every occurrence of JOINT_VEL in the observation_spec. The
-                velocity will not be limited in mujoco
-             **viewer_params: other parameters to be passed to the viewer.
-                See MujocoViewer documentation for the available options.
+            xml_file (str/xml handle): A string with a path to the xml or an Mujoco xml handle.
+            actuation_spec (list): A list specifying the names of the joints  which should be controllable by the
+               agent. Can be left empty when all actuators should be used;
+            observation_spec (list): A list containing the names of data that should be made available to the agent as
+               an observation and their type (ObservationType). They are combined with a key, which is used to access
+               the data. An entry in the list is given by: (key, name, type). The name can later be used to retrieve
+               specific observations;
+            gamma (float): The discounting factor of the environment;
+            horizon (int): The maximum horizon for the environment;
+            timestep (float): The timestep used by the MuJoCo simulator. If None, the default timestep specified in
+               the XML will be used;
+            n_substeps (int, 1): The number of substeps to use by the MuJoCo simulator. An action given by the agent
+               will be applied for n_substeps before the agent receives the next observation and can act accordingly;
+            n_intermediate_steps (int, 1): The number of steps between every action taken by the agent. Similar to
+               n_substeps but allows the user to modify, control and access intermediate states.
+            additional_data_spec (list, None): A list containing the data fields of interest, which should be read from
+               or written to during simulation. The entries are given as the following tuples: (key, name, type) key
+               is a string for later referencing in the "read_data" and "write_data" methods. The name is the name of
+               the object in the XML specification and the type is the ObservationType;
+            collision_groups (list, None): A list containing groups of geoms for which collisions should be checked
+               during simulation via ``check_collision``. The entries are given as: ``(key, geom_names)``, where key is
+               a string for later referencing in the "check_collision" method, and geom_names is a list of geom names
+               in the XML specification.
+            max_joint_vel (list, None): A list with the maximum joint velocities which are provided in the mdp_info.
+               The list has to define a maximum velocity for every occurrence of JOINT_VEL in the observation_spec. The
+               velocity will not be limited in mujoco
+            **viewer_params: other parameters to be passed to the viewer.
+               See MujocoViewer documentation for the available options.
 
         """
         # Create the simulation
@@ -462,9 +451,8 @@ def get_action_indices(model, data, actuation_spec):
         Args:
             model: MuJoCo model.
             data: MuJoCo data structure.
-             actuation_spec (list): A list specifying the names of the joints
-                which should be controllable by the agent. Can be left empty
-                when all actuators should be used;
+            actuation_spec (list): A list specifying the names of the joints  which should be controllable by the agent.
+                Can be left empty when all actuators should be used;
 
         Returns:
             A list of actuator indices.
@@ -561,45 +549,34 @@ def __init__(self, xml_files, actuation_spec, observation_spec, gamma, horizon,
         Constructor.
 
         Args:
-             xml_files (str/xml handle): A list containing strings with a path to the xml or Mujoco xml handles;
-             actuation_spec (list): A list specifying the names of the joints
-                which should be controllable by the agent. Can be left empty
-                when all actuators should be used;
-             observation_spec (list): A list containing the names of data that
-                should be made available to the agent as an observation and
-                their type (ObservationType). They are combined with a key,
-                which is used to access the data. An entry in the list
-                is given by: (key, name, type);
-             gamma (float): The discounting factor of the environment;
-             horizon (int): The maximum horizon for the environment;
-             timestep (float): The timestep used by the MuJoCo
-                simulator. If None, the default timestep specified in the XML will be used;
-             n_substeps (int, 1): The number of substeps to use by the MuJoCo
-                simulator. An action given by the agent will be applied for
-                n_substeps before the agent receives the next observation and
-                can act accordingly;
-             n_intermediate_steps (int, 1): The number of steps between every action
-                taken by the agent. Similar to n_substeps but allows the user
-                to modify, control and access intermediate states.
-             additional_data_spec (list, None): A list containing the data fields of
-                interest, which should be read from or written to during
-                simulation. The entries are given as the following tuples:
-                (key, name, type) key is a string for later referencing in the
-                "read_data" and "write_data" methods. The name is the name of
-                the object in the XML specification and the type is the
-                ObservationType;
-             collision_groups (list, None): A list containing groups of geoms for
-                which collisions should be checked during simulation via
-                ``check_collision``. The entries are given as:
-                ``(key, geom_names)``, where key is a string for later
-                referencing in the "check_collision" method, and geom_names is
-                a list of geom names in the XML specification.
-             max_joint_vel (list, None): A list with the maximum joint velocities which are provided in the mdp_info.
+            xml_files (str/xml handle): A list containing strings with a path to the xml or Mujoco xml handles;
+                actuation_spec (list): A list specifying the names of the joints which should be controllable by the
+                agent. Can be left empty when all actuators should be used;
+            observation_spec (list): A list containing the names of data that should be made available to the agent as
+                an observation and their type (ObservationType). They are combined with a key, which is used to access
+                the data. An entry in the list is given by: (key, name, type);
+            gamma (float): The discounting factor of the environment;
+            horizon (int): The maximum horizon for the environment;
+            timestep (float): The timestep used by the MuJoCo simulator. If None, the default timestep specified in the
+                XML will be used;
+            n_substeps (int, 1): The number of substeps to use by the MuJoCo simulator. An action given by the agent
+                will be applied for n_substeps before the agent receives the next observation and can act accordingly;
+            n_intermediate_steps (int, 1): The number of steps between every action taken by the agent. Similar to
+            n_substeps but allows the user to modify, control and access intermediate states.
+                additional_data_spec (list, None): A list containing the data fields of interest, which should be read
+                from or written to during simulation. The entries are given as the following tuples: (key, name, type)
+                key is a string for later referencing in the "read_data" and "write_data" methods. The name is the name
+                of the object in the XML specification and the type is the ObservationType;
+            collision_groups (list, None): A list containing groups of geoms for which collisions should be checked
+                during simulation via ``check_collision``. The entries are given as: ``(key, geom_names)``, where key is
+                a string for later referencing in the "check_collision" method, and geom_names is a list of geom names
+                in the XML specification.
+            max_joint_vel (list, None): A list with the maximum joint velocities which are provided in the mdp_info.
                 The list has to define a maximum velocity for every occurrence of JOINT_VEL in the observation_spec. The
                 velocity will not be limited in mujoco.
             random_env_reset (bool): If True, a random environment/model is chosen after each episode. If False, it is
-                sequentially iterated through the environment/model list.
-             **viewer_params: other parameters to be passed to the viewer.
+            sequentially iterated through the environment/model list.
+            **viewer_params: other parameters to be passed to the viewer.
                 See MujocoViewer documentation for the available options.
 
         """

diff --git a/mushroom_rl/features/tensors/random_fourier_tensor.py b/mushroom_rl/features/tensors/random_fourier_tensor.py
@@ -9,30 +9,28 @@
 
 class RandomFourierBasis(nn.Module):
     r"""
-    Class implementing Random Fourier basis functions. The value of the feature
-    is computed using the formula:
+    Class implementing Random Fourier basis functions. The value of the feature is computed using the formula:
 
     .. math::
         \sin{\dfrac{PX}{\nu}+\varphi}
 
-    where X is the input, m is the vector of the minumum input values (for each
-    dimensions) , \Delta is the vector of maximum
 
-    This features have been presented in:
+    where X is the input, m is the vector of the minimum input values (for each dimensions), :math: `\Delta` is the
+    vector of maximum variations of the input variables, i.e. delta = high - low;
 
-    "Towards generalization and simplicity in continuous control". Rajeswaran A. et Al..
-    2017.
+    These features have been presented in:
+
+    "Towards generalization and simplicity in continuous control". Rajeswaran A. et Al.. 2017.
 
     """
     def __init__(self, P, phi, nu):
-        r"""
+        """
         Constructor.
 
         Args:
             P (np.ndarray): weights matrix, every weight should be drawn from a normal distribution;
             phi (np.ndarray): bias vector, every weight should be drawn from a uniform distribution in the interval
-                [-\pi, \pi);
-             values of the input variables, i.e. delta = high - low;
+                :math: `[-\pi, \pi)`;
             nu (float):  bandwidth parameter, it should be chosen approximately as the average pairwise distances
                 between different observation vectors.
 

diff --git a/mushroom_rl/rl_utils/__init__.py b/mushroom_rl/rl_utils/__init__.py
@@ -1,6 +1,6 @@
 from .eligibility_trace import EligibilityTrace, ReplacingTrace, AccumulatingTrace
 from .optimizers import Optimizer, AdamOptimizer, SGDOptimizer, AdaptiveOptimizer
-from .parameters import Parameter, ExponentialParameter, LinearParameter, to_parameter
+from .parameters import Parameter, DecayParameter, LinearParameter, to_parameter
 from .preprocessors import StandardizationPreprocessor, MinMaxPreprocessor
 from .replay_memory import ReplayMemory, PrioritizedReplayMemory
 from .running_stats import RunningStandardization, RunningAveragedWindow, RunningExpWeightedAverage

diff --git a/mushroom_rl/rl_utils/parameters.py b/mushroom_rl/rl_utils/parameters.py
@@ -24,13 +24,10 @@ def __init__(self, value, min_value=None, max_value=None, size=(1,)):
 
         Args:
             value (float): initial value of the parameter;
-            min_value (float, None): minimum value that the parameter can reach
-                when decreasing;
-            max_value (float, None): maximum value that the parameter can reach
-                when increasing;
-            size (tuple, (1,)): shape of the matrix of parameters; this shape
-                can be used to have a single parameter for each state or
-                state-action tuple.
+            min_value (float, None): minimum value that the parameter can reach when decreasing;
+            max_value (float, None): maximum value that the parameter can reach when increasing;
+            size (tuple, (1,)): shape of the matrix of parameters; this shape can be used to have a single parameter for
+                each state or state-action tuple.
 
         """
         self._initial_value = value
@@ -94,8 +91,7 @@ def update(self, *idx, **kwargs):
         Updates the number of visit of the parameter in the provided index.
 
         Args:
-            *idx (list): index of the parameter whose number of visits has to be
-                updated.
+            *idx (list): index of the parameter whose number of visits has to be updated.
 
         """
         self._n_updates[idx] += 1
@@ -121,11 +117,28 @@ def initial_value(self):
 
 class LinearParameter(Parameter):
     """
-    This class implements a linearly changing parameter according to the number
-    of times it has been used.
+    This class implements a linearly changing parameter according to the number of times it has been used.
+    The parameter changes following the formula:
+
+    .. math::
+        v_n = \textrm{clip}(v_0 + \dfrac{v_{th} - v_0}}{n}, v_{th})
+
+    where :math: `v_0` is the initial value of the parameter,  :math: `n` is the number of steps and  :math: `v_{th}` is
+    the upper or lower threshold for the parameter.
 
     """
     def __init__(self, value, threshold_value, n, size=(1,)):
+        """
+        Constructor.
+
+        Args:
+            value (float): initial value of the parameter;
+            threshold_value (float, None): minimum or maximum value that the parameter can reach;
+            n (int): number of time steps needed to reach the threshold value;
+            size (tuple, (1,)): shape of the matrix of parameters; this shape can be used to have a single parameter for
+                each state or state-action tuple.
+
+        """
         self._coeff = (threshold_value - value) / n
 
         if self._coeff >= 0:
@@ -139,14 +152,30 @@ def _compute(self, *idx, **kwargs):
         return self._coeff * self._n_updates[idx] + self._initial_value
 
 
-class ExponentialParameter(Parameter):
+class DecayParameter(Parameter):
     """
-    This class implements a exponentially changing parameter according to the
-    number of times it has been used.
+    This class implements a decaying parameter. The decay follows the formula:
+
+    .. math::
+        v_n = \dfrac{v_0}{n^p}
+
+    where :math: `v_0` is the initial value of the parameter,  :math: `n` is the number of steps and  :math: `p` is an
+    arbitrary exponent.
 
     """
-    def __init__(self, value, exp=1., min_value=None, max_value=None,
-                 size=(1,)):
+    def __init__(self, value, exp=1., min_value=None, max_value=None, size=(1,)):
+        """
+        Constructor.
+
+        Args:
+            value (float): initial value of the parameter;
+            exp (float, 1.): exponent for the step decay;
+            min_value (float, None): minimum value that the parameter can reach when decreasing;
+            max_value (float, None): maximum value that the parameter can reach when increasing;
+            size (tuple, (1,)): shape of the matrix of parameters; this shape can be used to have a single parameter for
+                each state or state-action tuple.
+
+        """
         self._exp = exp
 
         super().__init__(value, min_value, max_value, size)

diff --git a/mushroom_rl/utils/record.py b/mushroom_rl/utils/record.py
@@ -1,4 +1,3 @@
-import os
 import cv2
 import datetime
 from pathlib import Path
@@ -7,6 +6,7 @@
 class VideoRecorder(object):
     """
     Simple video record that creates a video from a stream of images.
+
     """
 
     def __init__(self, path="./mushroom_rl_recordings", tag=None, video_name=None, fps=60):