fix optCtrl using tutorial in paper appendix

HJReachability · Aug 9, 2021 · cf62bee · cf62bee
1 parent af20dc6
commit cf62bee
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 38 deletions.
diff --git a/dynSys/@MountainCarV0/optCtrl.m b/dynSys/@MountainCarV0/optCtrl.m
@@ -1,43 +1,39 @@
 function uOpt = optCtrl(obj, t, xs, deriv, uMode, ~)
 % uOpt = optCtrl(obj, t, deriv, uMode, dMode, MIEdims)
 
-position = xs{1};
-velocity = xs{2};
-
 %% Input processing
 if nargin < 5
   uMode = 'min';
 end
 
+if ~iscell(deriv)
+  deriv = num2cell(deriv);
+end
 
-%% https://github.com/ZhiqingXiao/OpenAIGymSolution/blob/master/MountainCar-v0_close_form/mountaincar_v0_close_form.ipynb 
-%         position, velocity = observation
-%         lb = min(-0.09 * (position + 0.25) ** 2 + 0.03,
-%                 0.3 * (position + 0.9) ** 4 - 0.008)
-%         ub = -0.07 * (position + 0.38) ** 2 + 0.07
-%         if lb < velocity < ub:
-%             action = 2 # push right
-%         else:
-%             action = 0 # push left
-
-% TODO: Use deriv instead of hard-coded values above?
-% TODO: not sure this is working... 
 
+%% Optimal control
 
-lb = min(-0.09 * (position + 0.25)^2 + 0.03, ...
-       0.3 * (position + 0.9)^4 - 0.008);
+% https://arxiv.org/pdf/1709.07523.pdf
+% Appendix 
+% need dot product of deriv and dx = f(x)
+% then argmin terms with action to find optimal control
 
-ub = -0.07 * (position + 0.38)^2 + 0.07;
+% we have    
+%   dx(1) = velocity;
+% u ~ { 0, 1, 2 }
+%   dx(2) = (u - 1) * force + cos(3 * position) * (-gravity);
 
-% action = 0; % push left
-% if (lb < velocity) & (velocity < ub)
-%   action = 2; % push right
-% end
-% TODO: handle uMode (always min for Backwards Reachability with goal)  
+% u* = argmin_u (deriv{[0, 1]} * dx(2))
+%    = argmin_u (deriv{[0, 1]} (u - 1) * force)
+%    = 0 when deriv{[0, 1]} >= 0, 2 when deriv{[0, 1]} < 0
 
 %% Optimal control
-actions = zeros(size(xs{1}));
-actions((lb < velocity) & (velocity < ub)) = 2;
-uOpt = actions;
+if strcmp(uMode, 'max')
+  uOpt = (deriv{1} >= 0) * 2 + (deriv{1} < 0) * 0;
+elseif strcmp(uMode, 'min')
+  uOpt = (deriv{1} >= 0) * 0 + (deriv{1} < 0) * 2;
+else
+  error('Unknown uMode!')
+end
 
 end
diff --git a/mountain_car.m b/mountain_car.m
@@ -47,17 +47,14 @@ function mountain_car()
 min_position = -1.2;
 max_position = 0.6;
 
-goal_position = 0.5;
-goal_velocity = 0.0;
-
 %% Should we compute the trajectory?
-% compTraj = true;
-compTraj = false;
+compTraj = true;
+% compTraj = false;
 
 %% Grid
 grid_min = [min_position; min_velocity]; % Lower corner of computation domain
 grid_max = [max_position; max_velocity];    % Upper corner of computation domain
-N = [141; 141];         % Number of grid points per dimension
+N = [41; 41];         % Number of grid points per dimension
 grid = createGrid(grid_min, grid_max, N);
 
 % state space dimensions
@@ -69,9 +66,8 @@ function mountain_car()
 % data0 = shapeCylinder(grid, 3, [0; 0; 0], R);
 % also try shapeRectangleByCorners, shapeSphere, etc.
 
-center = [0.55; 0];
-widths = [0.05; 0.01];
-
+% center = [0.55; 0];
+% widths = [0.05; 0.01];
 
 center = [0.55; 0];
 widths = [0.15; 0.03];
@@ -81,7 +77,7 @@ function mountain_car()
 %% time vector
 t0 = 0;
 % changed the time from 2 seconds to 15
-tMax = 20;
+tMax = 60;
 dt = 0.1;
 tau = t0:dt:tMax;
 
@@ -102,7 +98,7 @@ function mountain_car()
 schemeData.grid = grid;
 schemeData.dynSys = mCar;
 % schemeData.accuracy = 'high'; 
-schemeData.accuracy = 'low'; 
+schemeData.accuracy = 'veryHigh'; 
 schemeData.uMode = uMode;
 
 %% Compute value function
@@ -139,7 +135,7 @@ function mountain_car()
   pause
 
   %set the initial state
-  xinit = [2, 2];
+  xinit = [-0.5, 0];
 
   figure(6)
   clf