diff --git a/docs/source/mushroom_rl.rl_utils.rst b/docs/source/mushroom_rl.rl_utils.rst index 262bacfe..4a9be33f 100644 --- a/docs/source/mushroom_rl.rl_utils.rst +++ b/docs/source/mushroom_rl.rl_utils.rst @@ -1,3 +1,6 @@ +Reinforcement Learning utils +============================ + Eligibility trace ----------------- diff --git a/mushroom_rl/features/basis/fourier.py b/mushroom_rl/features/basis/fourier.py index 6a85bfb2..e96b7033 100644 --- a/mushroom_rl/features/basis/fourier.py +++ b/mushroom_rl/features/basis/fourier.py @@ -9,8 +9,8 @@ class FourierBasis: .. math:: \sum \cos{\pi(X - m)/\Delta c} - where X is the input, m is the vector of the minumum input values (for each - dimensions) , \Delta is the vector of maximum + where :math:`X` is the input, m is the vector of the minumum input values (for each dimensions) , :math:`\Delta` is + the vector of differences between maximum and minumun values for the variables. """ def __init__(self, low, delta, c, dimensions=None): @@ -19,11 +19,10 @@ def __init__(self, low, delta, c, dimensions=None): Args: low (np.ndarray): vector of minimum values of the input variables; - delta (np.ndarray): vector of the maximum difference between two - values of the input variables, i.e. delta = high - low; + delta (np.ndarray): vector of the maximum difference between two values of the input variables, i.e. + delta = high - low; c (np.ndarray): vector of weights for the state variables; - dimensions (list, None): list of the dimensions of the input to be - considered by the feature. + dimensions (list, None): list of the dimensions of the input to be considered by the feature. """ self._low = low diff --git a/mushroom_rl/features/basis/gaussian_rbf.py b/mushroom_rl/features/basis/gaussian_rbf.py index 1389a1e7..1eadfa19 100644 --- a/mushroom_rl/features/basis/gaussian_rbf.py +++ b/mushroom_rl/features/basis/gaussian_rbf.py @@ -9,7 +9,7 @@ class GaussianRBF: .. math:: \sum \dfrac{(X_i - \mu_i)^2}{\sigma_i} - where X is the input, \mu is the mean vector and \sigma is the scale parameter vector. + where :math:`X` is the input, :math:`\mu` is the mean vector and :math:`\sigma` is the scale parameter vector. """ def __init__(self, mean, scale, dimensions=None): diff --git a/mushroom_rl/features/basis/polynomial.py b/mushroom_rl/features/basis/polynomial.py index 3483dce4..7e37b970 100644 --- a/mushroom_rl/features/basis/polynomial.py +++ b/mushroom_rl/features/basis/polynomial.py @@ -9,7 +9,7 @@ class PolynomialBasis: .. math:: \prod X_i^{d_i} - where X is the input and d is the vector of the exponents of the polynomial. + where :math:`X~ is the input and :math:`d` is the vector of the exponents of the polynomial. """ def __init__(self, dimensions=None, degrees=None, low=None, high=None): diff --git a/mushroom_rl/features/tensors/random_fourier_tensor.py b/mushroom_rl/features/tensors/random_fourier_tensor.py index 6e911e98..a356f40c 100644 --- a/mushroom_rl/features/tensors/random_fourier_tensor.py +++ b/mushroom_rl/features/tensors/random_fourier_tensor.py @@ -15,8 +15,8 @@ class RandomFourierBasis(nn.Module): \sin{\dfrac{PX}{\nu}+\varphi} - where X is the input, m is the vector of the minimum input values (for each dimensions), :math: `\Delta` is the - vector of maximum variations of the input variables, i.e. delta = high - low; + where :math:`X` is the input, :math:`P` is a random weights matrix, :math:`\nu` is the bandwidth parameter and + :math:`\varphi` is a bias vector. These features have been presented in: diff --git a/mushroom_rl/rl_utils/parameters.py b/mushroom_rl/rl_utils/parameters.py index 1f9d7899..5dc28ee0 100644 --- a/mushroom_rl/rl_utils/parameters.py +++ b/mushroom_rl/rl_utils/parameters.py @@ -116,14 +116,14 @@ def initial_value(self): class LinearParameter(Parameter): - """ + r""" This class implements a linearly changing parameter according to the number of times it has been used. The parameter changes following the formula: .. math:: - v_n = \textrm{clip}(v_0 + \dfrac{v_{th} - v_0}}{n}, v_{th}) + v_n = \textrm{clip}(v_0 + \dfrac{v_{th} - v_0}{n}, v_{th}) - where :math: `v_0` is the initial value of the parameter, :math: `n` is the number of steps and :math: `v_{th}` is + where :math:`v_0` is the initial value of the parameter, :math:`n` is the number of steps and :math:`v_{th}` is the upper or lower threshold for the parameter. """ @@ -153,13 +153,13 @@ def _compute(self, *idx, **kwargs): class DecayParameter(Parameter): - """ + r""" This class implements a decaying parameter. The decay follows the formula: .. math:: v_n = \dfrac{v_0}{n^p} - where :math: `v_0` is the initial value of the parameter, :math: `n` is the number of steps and :math: `p` is an + where :math:`v_0` is the initial value of the parameter, :math:`n` is the number of steps and :math:`p` is an arbitrary exponent. """