diff --git a/dynSys/@MountainCarV0/optCtrl.m b/dynSys/@MountainCarV0/optCtrl.m index a358914..b103a19 100644 --- a/dynSys/@MountainCarV0/optCtrl.m +++ b/dynSys/@MountainCarV0/optCtrl.m @@ -1,43 +1,39 @@ function uOpt = optCtrl(obj, t, xs, deriv, uMode, ~) % uOpt = optCtrl(obj, t, deriv, uMode, dMode, MIEdims) -position = xs{1}; -velocity = xs{2}; - %% Input processing if nargin < 5 uMode = 'min'; end +if ~iscell(deriv) + deriv = num2cell(deriv); +end -%% https://github.com/ZhiqingXiao/OpenAIGymSolution/blob/master/MountainCar-v0_close_form/mountaincar_v0_close_form.ipynb -% position, velocity = observation -% lb = min(-0.09 * (position + 0.25) ** 2 + 0.03, -% 0.3 * (position + 0.9) ** 4 - 0.008) -% ub = -0.07 * (position + 0.38) ** 2 + 0.07 -% if lb < velocity < ub: -% action = 2 # push right -% else: -% action = 0 # push left - -% TODO: Use deriv instead of hard-coded values above? -% TODO: not sure this is working... +%% Optimal control -lb = min(-0.09 * (position + 0.25)^2 + 0.03, ... - 0.3 * (position + 0.9)^4 - 0.008); +% https://arxiv.org/pdf/1709.07523.pdf +% Appendix +% need dot product of deriv and dx = f(x) +% then argmin terms with action to find optimal control -ub = -0.07 * (position + 0.38)^2 + 0.07; +% we have +% dx(1) = velocity; +% u ~ { 0, 1, 2 } +% dx(2) = (u - 1) * force + cos(3 * position) * (-gravity); -% action = 0; % push left -% if (lb < velocity) & (velocity < ub) -% action = 2; % push right -% end -% TODO: handle uMode (always min for Backwards Reachability with goal) +% u* = argmin_u (deriv{[0, 1]} * dx(2)) +% = argmin_u (deriv{[0, 1]} (u - 1) * force) +% = 0 when deriv{[0, 1]} >= 0, 2 when deriv{[0, 1]} < 0 %% Optimal control -actions = zeros(size(xs{1})); -actions((lb < velocity) & (velocity < ub)) = 2; -uOpt = actions; +if strcmp(uMode, 'max') + uOpt = (deriv{1} >= 0) * 2 + (deriv{1} < 0) * 0; +elseif strcmp(uMode, 'min') + uOpt = (deriv{1} >= 0) * 0 + (deriv{1} < 0) * 2; +else + error('Unknown uMode!') +end end \ No newline at end of file diff --git a/mountain_car.m b/mountain_car.m index 99e7bba..e7617b1 100644 --- a/mountain_car.m +++ b/mountain_car.m @@ -47,17 +47,14 @@ function mountain_car() min_position = -1.2; max_position = 0.6; -goal_position = 0.5; -goal_velocity = 0.0; - %% Should we compute the trajectory? -% compTraj = true; -compTraj = false; +compTraj = true; +% compTraj = false; %% Grid grid_min = [min_position; min_velocity]; % Lower corner of computation domain grid_max = [max_position; max_velocity]; % Upper corner of computation domain -N = [141; 141]; % Number of grid points per dimension +N = [41; 41]; % Number of grid points per dimension grid = createGrid(grid_min, grid_max, N); % state space dimensions @@ -69,9 +66,8 @@ function mountain_car() % data0 = shapeCylinder(grid, 3, [0; 0; 0], R); % also try shapeRectangleByCorners, shapeSphere, etc. -center = [0.55; 0]; -widths = [0.05; 0.01]; - +% center = [0.55; 0]; +% widths = [0.05; 0.01]; center = [0.55; 0]; widths = [0.15; 0.03]; @@ -81,7 +77,7 @@ function mountain_car() %% time vector t0 = 0; % changed the time from 2 seconds to 15 -tMax = 20; +tMax = 60; dt = 0.1; tau = t0:dt:tMax; @@ -102,7 +98,7 @@ function mountain_car() schemeData.grid = grid; schemeData.dynSys = mCar; % schemeData.accuracy = 'high'; -schemeData.accuracy = 'low'; +schemeData.accuracy = 'veryHigh'; schemeData.uMode = uMode; %% Compute value function @@ -139,7 +135,7 @@ function mountain_car() pause %set the initial state - xinit = [2, 2]; + xinit = [-0.5, 0]; figure(6) clf