From e6b3e08b129fd8e1dedd77fa33cbec9d4efa9eae Mon Sep 17 00:00:00 2001
From: Michael Lauria <michael.david.lauria@gmail.com>
Date: Mon, 9 Aug 2021 14:41:08 -0600
Subject: [PATCH] fix optCtrl using tutorial in paper appendix

---
 dynSys/@MountainCarV0/optCtrl.m | 48 +++++++++++++++------------------
 mountain_car.m                  | 32 ++++++++++------------
 2 files changed, 36 insertions(+), 44 deletions(-)

diff --git a/dynSys/@MountainCarV0/optCtrl.m b/dynSys/@MountainCarV0/optCtrl.m
index a358914..b103a19 100644
--- a/dynSys/@MountainCarV0/optCtrl.m
+++ b/dynSys/@MountainCarV0/optCtrl.m
@@ -1,43 +1,39 @@
 function uOpt = optCtrl(obj, t, xs, deriv, uMode, ~)
 % uOpt = optCtrl(obj, t, deriv, uMode, dMode, MIEdims)
 
-position = xs{1};
-velocity = xs{2};
-    
 %% Input processing
 if nargin < 5
   uMode = 'min';
 end
 
+if ~iscell(deriv)
+  deriv = num2cell(deriv);
+end
 
-%% https://github.com/ZhiqingXiao/OpenAIGymSolution/blob/master/MountainCar-v0_close_form/mountaincar_v0_close_form.ipynb 
-%         position, velocity = observation
-%         lb = min(-0.09 * (position + 0.25) ** 2 + 0.03,
-%                 0.3 * (position + 0.9) ** 4 - 0.008)
-%         ub = -0.07 * (position + 0.38) ** 2 + 0.07
-%         if lb < velocity < ub:
-%             action = 2 # push right
-%         else:
-%             action = 0 # push left
-
-% TODO: Use deriv instead of hard-coded values above?
-% TODO: not sure this is working... 
 
+%% Optimal control
 
-lb = min(-0.09 * (position + 0.25)^2 + 0.03, ...
-       0.3 * (position + 0.9)^4 - 0.008);
+% https://arxiv.org/pdf/1709.07523.pdf
+% Appendix 
+% need dot product of deriv and dx = f(x)
+% then argmin terms with action to find optimal control
 
-ub = -0.07 * (position + 0.38)^2 + 0.07;
+% we have    
+%   dx(1) = velocity;
+% u ~ { 0, 1, 2 }
+%   dx(2) = (u - 1) * force + cos(3 * position) * (-gravity);
 
-% action = 0; % push left
-% if (lb < velocity) & (velocity < ub)
-%   action = 2; % push right
-% end
-% TODO: handle uMode (always min for Backwards Reachability with goal)  
+% u* = argmin_u (deriv{[0, 1]} * dx(2))
+%    = argmin_u (deriv{[0, 1]} (u - 1) * force)
+%    = 0 when deriv{[0, 1]} >= 0, 2 when deriv{[0, 1]} < 0
 
 %% Optimal control
-actions = zeros(size(xs{1}));
-actions((lb < velocity) & (velocity < ub)) = 2;
-uOpt = actions;
+if strcmp(uMode, 'max')
+  uOpt = (deriv{1} >= 0) * 2 + (deriv{1} < 0) * 0;
+elseif strcmp(uMode, 'min')
+  uOpt = (deriv{1} >= 0) * 0 + (deriv{1} < 0) * 2;
+else
+  error('Unknown uMode!')
+end
 
 end
\ No newline at end of file
diff --git a/mountain_car.m b/mountain_car.m
index 99e7bba..c42bac3 100644
--- a/mountain_car.m
+++ b/mountain_car.m
@@ -47,17 +47,14 @@ function mountain_car()
 min_position = -1.2;
 max_position = 0.6;
 
-goal_position = 0.5;
-goal_velocity = 0.0;
-
 %% Should we compute the trajectory?
-% compTraj = true;
-compTraj = false;
+compTraj = true;
+% compTraj = false;
 
 %% Grid
 grid_min = [min_position; min_velocity]; % Lower corner of computation domain
 grid_max = [max_position; max_velocity];    % Upper corner of computation domain
-N = [141; 141];         % Number of grid points per dimension
+N = [181; 181];         % Number of grid points per dimension
 grid = createGrid(grid_min, grid_max, N);
 
 % state space dimensions
@@ -69,19 +66,18 @@ function mountain_car()
 % data0 = shapeCylinder(grid, 3, [0; 0; 0], R);
 % also try shapeRectangleByCorners, shapeSphere, etc.
 
-center = [0.55; 0];
-widths = [0.05; 0.01];
-
+% center = [0.55; 0];
+% widths = [0.05; 0.01];
 
-center = [0.55; 0];
-widths = [0.15; 0.03];
+center = [0.5; 0];
+widths = [0.1; 0.02];
 
 data0 = shapeRectangleByCenter(grid, center, widths);
 
 %% time vector
 t0 = 0;
 % changed the time from 2 seconds to 15
-tMax = 20;
+tMax = 80;
 dt = 0.1;
 tau = t0:dt:tMax;
 
@@ -102,7 +98,7 @@ function mountain_car()
 schemeData.grid = grid;
 schemeData.dynSys = mCar;
 % schemeData.accuracy = 'high'; 
-schemeData.accuracy = 'low'; 
+schemeData.accuracy = 'veryHigh'; 
 schemeData.uMode = uMode;
 
 %% Compute value function
@@ -139,19 +135,19 @@ function mountain_car()
   pause
   
   %set the initial state
-  xinit = [2, 2];
+  xinit = [-0.5, 0];
   
   figure(6)
   clf
-  h = visSetIm(g, data(:,:,:,end));
-  h.FaceAlpha = .3;
+  h = visSetIm(grid, data(:,:,:,end));
+%   h.FaceAlpha = .3;
   hold on
   s = scatter3(xinit(1), xinit(2), xinit(3));
   s.SizeData = 70;
   
   %check if this initial state is in the BRS/BRT
   %value = eval_u(g, data, x)
-  value = eval_u(g,data(:,:,:,end),xinit);
+  value = eval_u(grid,data(:,:,:,end),xinit);
   
   if value <= 0 %if initial state is in BRS/BRT
     % find optimal trajectory
@@ -171,7 +167,7 @@ function mountain_car()
     % [traj, traj_tau] = ...
     % computeOptTraj(g, data, tau, dynSys, extraArgs)
     [traj, traj_tau] = ...
-      computeOptTraj(g, dataTraj, tau2, mCar, TrajextraArgs);
+      computeOptTraj(grid, dataTraj, tau2, mCar, TrajextraArgs);
   else
     error(['Initial state is not in the BRS/BRT! It have a value of ' num2str(value,2)])
   end