Skip to content

Commit

Permalink
Renamed Exponential Parameter in Decay Parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
boris-il-forte committed Dec 7, 2023
1 parent 1ae4000 commit 584aa86
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 114 deletions.
4 changes: 2 additions & 2 deletions examples/double_chain_q_learning/double_chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from mushroom_rl.environments import *
from mushroom_rl.policy import EpsGreedy
from mushroom_rl.utils.callbacks import CollectQ
from mushroom_rl.rl_utils.parameters import Parameter, ExponentialParameter
from mushroom_rl.rl_utils.parameters import Parameter, DecayParameter


"""
Expand All @@ -33,7 +33,7 @@ def experiment(algorithm_class, exp):
pi = EpsGreedy(epsilon=epsilon)

# Agent
learning_rate = ExponentialParameter(value=1., exp=exp, size=mdp.info.size)
learning_rate = DecayParameter(value=1., exp=exp, size=mdp.info.size)
algorithm_params = dict(learning_rate=learning_rate)
agent = algorithm_class(mdp.info, pi, **algorithm_params)

Expand Down
6 changes: 3 additions & 3 deletions examples/grid_world_td.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from mushroom_rl.environments import *
from mushroom_rl.policy import EpsGreedy
from mushroom_rl.utils.callbacks import CollectDataset, CollectMaxQ
from mushroom_rl.rl_utils.parameters import ExponentialParameter
from mushroom_rl.rl_utils.parameters import DecayParameter


"""
Expand All @@ -31,11 +31,11 @@ def experiment(algorithm_class, exp):
mdp = GridWorldVanHasselt()

# Policy
epsilon = ExponentialParameter(value=1, exp=.5, size=mdp.info.observation_space.size)
epsilon = DecayParameter(value=1, exp=.5, size=mdp.info.observation_space.size)
pi = EpsGreedy(epsilon=epsilon)

# Agent
learning_rate = ExponentialParameter(value=1, exp=exp, size=mdp.info.size)
learning_rate = DecayParameter(value=1, exp=exp, size=mdp.info.size)
algorithm_params = dict(learning_rate=learning_rate)
agent = algorithm_class(mdp.info, pi, **algorithm_params)

Expand Down
133 changes: 55 additions & 78 deletions mushroom_rl/environments/mujoco.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,45 +18,34 @@ def __init__(self, xml_file, actuation_spec, observation_spec, gamma, horizon, t
Constructor.
Args:
xml_file (str/xml handle): A string with a path to the xml or an Mujoco xml handle.
actuation_spec (list): A list specifying the names of the joints
which should be controllable by the agent. Can be left empty
when all actuators should be used;
observation_spec (list): A list containing the names of data that
should be made available to the agent as an observation and
their type (ObservationType). They are combined with a key,
which is used to access the data. An entry in the list
is given by: (key, name, type). The name can later be used
to retrieve specific observations;
gamma (float): The discounting factor of the environment;
horizon (int): The maximum horizon for the environment;
timestep (float): The timestep used by the MuJoCo
simulator. If None, the default timestep specified in the XML will be used;
n_substeps (int, 1): The number of substeps to use by the MuJoCo
simulator. An action given by the agent will be applied for
n_substeps before the agent receives the next observation and
can act accordingly;
n_intermediate_steps (int, 1): The number of steps between every action
taken by the agent. Similar to n_substeps but allows the user
to modify, control and access intermediate states.
additional_data_spec (list, None): A list containing the data fields of
interest, which should be read from or written to during
simulation. The entries are given as the following tuples:
(key, name, type) key is a string for later referencing in the
"read_data" and "write_data" methods. The name is the name of
the object in the XML specification and the type is the
ObservationType;
collision_groups (list, None): A list containing groups of geoms for
which collisions should be checked during simulation via
``check_collision``. The entries are given as:
``(key, geom_names)``, where key is a string for later
referencing in the "check_collision" method, and geom_names is
a list of geom names in the XML specification.
max_joint_vel (list, None): A list with the maximum joint velocities which are provided in the mdp_info.
The list has to define a maximum velocity for every occurrence of JOINT_VEL in the observation_spec. The
velocity will not be limited in mujoco
**viewer_params: other parameters to be passed to the viewer.
See MujocoViewer documentation for the available options.
xml_file (str/xml handle): A string with a path to the xml or an Mujoco xml handle.
actuation_spec (list): A list specifying the names of the joints which should be controllable by the
agent. Can be left empty when all actuators should be used;
observation_spec (list): A list containing the names of data that should be made available to the agent as
an observation and their type (ObservationType). They are combined with a key, which is used to access
the data. An entry in the list is given by: (key, name, type). The name can later be used to retrieve
specific observations;
gamma (float): The discounting factor of the environment;
horizon (int): The maximum horizon for the environment;
timestep (float): The timestep used by the MuJoCo simulator. If None, the default timestep specified in
the XML will be used;
n_substeps (int, 1): The number of substeps to use by the MuJoCo simulator. An action given by the agent
will be applied for n_substeps before the agent receives the next observation and can act accordingly;
n_intermediate_steps (int, 1): The number of steps between every action taken by the agent. Similar to
n_substeps but allows the user to modify, control and access intermediate states.
additional_data_spec (list, None): A list containing the data fields of interest, which should be read from
or written to during simulation. The entries are given as the following tuples: (key, name, type) key
is a string for later referencing in the "read_data" and "write_data" methods. The name is the name of
the object in the XML specification and the type is the ObservationType;
collision_groups (list, None): A list containing groups of geoms for which collisions should be checked
during simulation via ``check_collision``. The entries are given as: ``(key, geom_names)``, where key is
a string for later referencing in the "check_collision" method, and geom_names is a list of geom names
in the XML specification.
max_joint_vel (list, None): A list with the maximum joint velocities which are provided in the mdp_info.
The list has to define a maximum velocity for every occurrence of JOINT_VEL in the observation_spec. The
velocity will not be limited in mujoco
**viewer_params: other parameters to be passed to the viewer.
See MujocoViewer documentation for the available options.
"""
# Create the simulation
Expand Down Expand Up @@ -462,9 +451,8 @@ def get_action_indices(model, data, actuation_spec):
Args:
model: MuJoCo model.
data: MuJoCo data structure.
actuation_spec (list): A list specifying the names of the joints
which should be controllable by the agent. Can be left empty
when all actuators should be used;
actuation_spec (list): A list specifying the names of the joints which should be controllable by the agent.
Can be left empty when all actuators should be used;
Returns:
A list of actuator indices.
Expand Down Expand Up @@ -561,45 +549,34 @@ def __init__(self, xml_files, actuation_spec, observation_spec, gamma, horizon,
Constructor.
Args:
xml_files (str/xml handle): A list containing strings with a path to the xml or Mujoco xml handles;
actuation_spec (list): A list specifying the names of the joints
which should be controllable by the agent. Can be left empty
when all actuators should be used;
observation_spec (list): A list containing the names of data that
should be made available to the agent as an observation and
their type (ObservationType). They are combined with a key,
which is used to access the data. An entry in the list
is given by: (key, name, type);
gamma (float): The discounting factor of the environment;
horizon (int): The maximum horizon for the environment;
timestep (float): The timestep used by the MuJoCo
simulator. If None, the default timestep specified in the XML will be used;
n_substeps (int, 1): The number of substeps to use by the MuJoCo
simulator. An action given by the agent will be applied for
n_substeps before the agent receives the next observation and
can act accordingly;
n_intermediate_steps (int, 1): The number of steps between every action
taken by the agent. Similar to n_substeps but allows the user
to modify, control and access intermediate states.
additional_data_spec (list, None): A list containing the data fields of
interest, which should be read from or written to during
simulation. The entries are given as the following tuples:
(key, name, type) key is a string for later referencing in the
"read_data" and "write_data" methods. The name is the name of
the object in the XML specification and the type is the
ObservationType;
collision_groups (list, None): A list containing groups of geoms for
which collisions should be checked during simulation via
``check_collision``. The entries are given as:
``(key, geom_names)``, where key is a string for later
referencing in the "check_collision" method, and geom_names is
a list of geom names in the XML specification.
max_joint_vel (list, None): A list with the maximum joint velocities which are provided in the mdp_info.
xml_files (str/xml handle): A list containing strings with a path to the xml or Mujoco xml handles;
actuation_spec (list): A list specifying the names of the joints which should be controllable by the
agent. Can be left empty when all actuators should be used;
observation_spec (list): A list containing the names of data that should be made available to the agent as
an observation and their type (ObservationType). They are combined with a key, which is used to access
the data. An entry in the list is given by: (key, name, type);
gamma (float): The discounting factor of the environment;
horizon (int): The maximum horizon for the environment;
timestep (float): The timestep used by the MuJoCo simulator. If None, the default timestep specified in the
XML will be used;
n_substeps (int, 1): The number of substeps to use by the MuJoCo simulator. An action given by the agent
will be applied for n_substeps before the agent receives the next observation and can act accordingly;
n_intermediate_steps (int, 1): The number of steps between every action taken by the agent. Similar to
n_substeps but allows the user to modify, control and access intermediate states.
additional_data_spec (list, None): A list containing the data fields of interest, which should be read
from or written to during simulation. The entries are given as the following tuples: (key, name, type)
key is a string for later referencing in the "read_data" and "write_data" methods. The name is the name
of the object in the XML specification and the type is the ObservationType;
collision_groups (list, None): A list containing groups of geoms for which collisions should be checked
during simulation via ``check_collision``. The entries are given as: ``(key, geom_names)``, where key is
a string for later referencing in the "check_collision" method, and geom_names is a list of geom names
in the XML specification.
max_joint_vel (list, None): A list with the maximum joint velocities which are provided in the mdp_info.
The list has to define a maximum velocity for every occurrence of JOINT_VEL in the observation_spec. The
velocity will not be limited in mujoco.
random_env_reset (bool): If True, a random environment/model is chosen after each episode. If False, it is
sequentially iterated through the environment/model list.
**viewer_params: other parameters to be passed to the viewer.
sequentially iterated through the environment/model list.
**viewer_params: other parameters to be passed to the viewer.
See MujocoViewer documentation for the available options.
"""
Expand Down
18 changes: 8 additions & 10 deletions mushroom_rl/features/tensors/random_fourier_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,28 @@

class RandomFourierBasis(nn.Module):
r"""
Class implementing Random Fourier basis functions. The value of the feature
is computed using the formula:
Class implementing Random Fourier basis functions. The value of the feature is computed using the formula:
.. math::
\sin{\dfrac{PX}{\nu}+\varphi}
where X is the input, m is the vector of the minumum input values (for each
dimensions) , \Delta is the vector of maximum
This features have been presented in:
where X is the input, m is the vector of the minimum input values (for each dimensions), :math: `\Delta` is the
vector of maximum variations of the input variables, i.e. delta = high - low;
"Towards generalization and simplicity in continuous control". Rajeswaran A. et Al..
2017.
These features have been presented in:
"Towards generalization and simplicity in continuous control". Rajeswaran A. et Al.. 2017.
"""
def __init__(self, P, phi, nu):
r"""
"""
Constructor.
Args:
P (np.ndarray): weights matrix, every weight should be drawn from a normal distribution;
phi (np.ndarray): bias vector, every weight should be drawn from a uniform distribution in the interval
[-\pi, \pi);
values of the input variables, i.e. delta = high - low;
:math: `[-\pi, \pi)`;
nu (float): bandwidth parameter, it should be chosen approximately as the average pairwise distances
between different observation vectors.
Expand Down
2 changes: 1 addition & 1 deletion mushroom_rl/rl_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .eligibility_trace import EligibilityTrace, ReplacingTrace, AccumulatingTrace
from .optimizers import Optimizer, AdamOptimizer, SGDOptimizer, AdaptiveOptimizer
from .parameters import Parameter, ExponentialParameter, LinearParameter, to_parameter
from .parameters import Parameter, DecayParameter, LinearParameter, to_parameter
from .preprocessors import StandardizationPreprocessor, MinMaxPreprocessor
from .replay_memory import ReplayMemory, PrioritizedReplayMemory
from .running_stats import RunningStandardization, RunningAveragedWindow, RunningExpWeightedAverage
Expand Down
61 changes: 45 additions & 16 deletions mushroom_rl/rl_utils/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,10 @@ def __init__(self, value, min_value=None, max_value=None, size=(1,)):
Args:
value (float): initial value of the parameter;
min_value (float, None): minimum value that the parameter can reach
when decreasing;
max_value (float, None): maximum value that the parameter can reach
when increasing;
size (tuple, (1,)): shape of the matrix of parameters; this shape
can be used to have a single parameter for each state or
state-action tuple.
min_value (float, None): minimum value that the parameter can reach when decreasing;
max_value (float, None): maximum value that the parameter can reach when increasing;
size (tuple, (1,)): shape of the matrix of parameters; this shape can be used to have a single parameter for
each state or state-action tuple.
"""
self._initial_value = value
Expand Down Expand Up @@ -94,8 +91,7 @@ def update(self, *idx, **kwargs):
Updates the number of visit of the parameter in the provided index.
Args:
*idx (list): index of the parameter whose number of visits has to be
updated.
*idx (list): index of the parameter whose number of visits has to be updated.
"""
self._n_updates[idx] += 1
Expand All @@ -121,11 +117,28 @@ def initial_value(self):

class LinearParameter(Parameter):
"""
This class implements a linearly changing parameter according to the number
of times it has been used.
This class implements a linearly changing parameter according to the number of times it has been used.
The parameter changes following the formula:
.. math::
v_n = \textrm{clip}(v_0 + \dfrac{v_{th} - v_0}}{n}, v_{th})
where :math: `v_0` is the initial value of the parameter, :math: `n` is the number of steps and :math: `v_{th}` is
the upper or lower threshold for the parameter.
"""
def __init__(self, value, threshold_value, n, size=(1,)):
"""
Constructor.
Args:
value (float): initial value of the parameter;
threshold_value (float, None): minimum or maximum value that the parameter can reach;
n (int): number of time steps needed to reach the threshold value;
size (tuple, (1,)): shape of the matrix of parameters; this shape can be used to have a single parameter for
each state or state-action tuple.
"""
self._coeff = (threshold_value - value) / n

if self._coeff >= 0:
Expand All @@ -139,14 +152,30 @@ def _compute(self, *idx, **kwargs):
return self._coeff * self._n_updates[idx] + self._initial_value


class ExponentialParameter(Parameter):
class DecayParameter(Parameter):
"""
This class implements a exponentially changing parameter according to the
number of times it has been used.
This class implements a decaying parameter. The decay follows the formula:
.. math::
v_n = \dfrac{v_0}{n^p}
where :math: `v_0` is the initial value of the parameter, :math: `n` is the number of steps and :math: `p` is an
arbitrary exponent.
"""
def __init__(self, value, exp=1., min_value=None, max_value=None,
size=(1,)):
def __init__(self, value, exp=1., min_value=None, max_value=None, size=(1,)):
"""
Constructor.
Args:
value (float): initial value of the parameter;
exp (float, 1.): exponent for the step decay;
min_value (float, None): minimum value that the parameter can reach when decreasing;
max_value (float, None): maximum value that the parameter can reach when increasing;
size (tuple, (1,)): shape of the matrix of parameters; this shape can be used to have a single parameter for
each state or state-action tuple.
"""
self._exp = exp

super().__init__(value, min_value, max_value, size)
Expand Down
2 changes: 1 addition & 1 deletion mushroom_rl/utils/record.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import cv2
import datetime
from pathlib import Path
Expand All @@ -7,6 +6,7 @@
class VideoRecorder(object):
"""
Simple video record that creates a video from a stream of images.
"""

def __init__(self, path="./mushroom_rl_recordings", tag=None, video_name=None, fps=60):
Expand Down
Loading

0 comments on commit 584aa86

Please sign in to comment.