Skip to content

Commit

Permalink
fix optCtrl using tutorial in paper appendix
Browse files Browse the repository at this point in the history
  • Loading branch information
mikedeltalima committed Aug 9, 2021
1 parent af20dc6 commit cf62bee
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 38 deletions.
48 changes: 22 additions & 26 deletions dynSys/@MountainCarV0/optCtrl.m
Original file line number Diff line number Diff line change
@@ -1,43 +1,39 @@
function uOpt = optCtrl(obj, t, xs, deriv, uMode, ~)
% uOpt = optCtrl(obj, t, deriv, uMode, dMode, MIEdims)

position = xs{1};
velocity = xs{2};

%% Input processing
if nargin < 5
uMode = 'min';
end

if ~iscell(deriv)
deriv = num2cell(deriv);
end

%% https://github.com/ZhiqingXiao/OpenAIGymSolution/blob/master/MountainCar-v0_close_form/mountaincar_v0_close_form.ipynb
% position, velocity = observation
% lb = min(-0.09 * (position + 0.25) ** 2 + 0.03,
% 0.3 * (position + 0.9) ** 4 - 0.008)
% ub = -0.07 * (position + 0.38) ** 2 + 0.07
% if lb < velocity < ub:
% action = 2 # push right
% else:
% action = 0 # push left

% TODO: Use deriv instead of hard-coded values above?
% TODO: not sure this is working...

%% Optimal control

lb = min(-0.09 * (position + 0.25)^2 + 0.03, ...
0.3 * (position + 0.9)^4 - 0.008);
% https://arxiv.org/pdf/1709.07523.pdf
% Appendix
% need dot product of deriv and dx = f(x)
% then argmin terms with action to find optimal control

ub = -0.07 * (position + 0.38)^2 + 0.07;
% we have
% dx(1) = velocity;
% u ~ { 0, 1, 2 }
% dx(2) = (u - 1) * force + cos(3 * position) * (-gravity);

% action = 0; % push left
% if (lb < velocity) & (velocity < ub)
% action = 2; % push right
% end
% TODO: handle uMode (always min for Backwards Reachability with goal)
% u* = argmin_u (deriv{[0, 1]} * dx(2))
% = argmin_u (deriv{[0, 1]} (u - 1) * force)
% = 0 when deriv{[0, 1]} >= 0, 2 when deriv{[0, 1]} < 0

%% Optimal control
actions = zeros(size(xs{1}));
actions((lb < velocity) & (velocity < ub)) = 2;
uOpt = actions;
if strcmp(uMode, 'max')
uOpt = (deriv{1} >= 0) * 2 + (deriv{1} < 0) * 0;
elseif strcmp(uMode, 'min')
uOpt = (deriv{1} >= 0) * 0 + (deriv{1} < 0) * 2;
else
error('Unknown uMode!')
end

end
20 changes: 8 additions & 12 deletions mountain_car.m
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,14 @@ function mountain_car()
min_position = -1.2;
max_position = 0.6;

goal_position = 0.5;
goal_velocity = 0.0;

%% Should we compute the trajectory?
% compTraj = true;
compTraj = false;
compTraj = true;
% compTraj = false;

%% Grid
grid_min = [min_position; min_velocity]; % Lower corner of computation domain
grid_max = [max_position; max_velocity]; % Upper corner of computation domain
N = [141; 141]; % Number of grid points per dimension
N = [41; 41]; % Number of grid points per dimension
grid = createGrid(grid_min, grid_max, N);

% state space dimensions
Expand All @@ -69,9 +66,8 @@ function mountain_car()
% data0 = shapeCylinder(grid, 3, [0; 0; 0], R);
% also try shapeRectangleByCorners, shapeSphere, etc.

center = [0.55; 0];
widths = [0.05; 0.01];

% center = [0.55; 0];
% widths = [0.05; 0.01];

center = [0.55; 0];
widths = [0.15; 0.03];
Expand All @@ -81,7 +77,7 @@ function mountain_car()
%% time vector
t0 = 0;
% changed the time from 2 seconds to 15
tMax = 20;
tMax = 60;
dt = 0.1;
tau = t0:dt:tMax;

Expand All @@ -102,7 +98,7 @@ function mountain_car()
schemeData.grid = grid;
schemeData.dynSys = mCar;
% schemeData.accuracy = 'high';
schemeData.accuracy = 'low';
schemeData.accuracy = 'veryHigh';
schemeData.uMode = uMode;

%% Compute value function
Expand Down Expand Up @@ -139,7 +135,7 @@ function mountain_car()
pause

%set the initial state
xinit = [2, 2];
xinit = [-0.5, 0];

figure(6)
clf
Expand Down

0 comments on commit cf62bee

Please sign in to comment.