From e6b3e08b129fd8e1dedd77fa33cbec9d4efa9eae Mon Sep 17 00:00:00 2001 From: Michael Lauria Date: Mon, 9 Aug 2021 14:41:08 -0600 Subject: [PATCH] fix optCtrl using tutorial in paper appendix --- dynSys/@MountainCarV0/optCtrl.m | 48 +++++++++++++++------------------ mountain_car.m | 32 ++++++++++------------ 2 files changed, 36 insertions(+), 44 deletions(-) diff --git a/dynSys/@MountainCarV0/optCtrl.m b/dynSys/@MountainCarV0/optCtrl.m index a358914..b103a19 100644 --- a/dynSys/@MountainCarV0/optCtrl.m +++ b/dynSys/@MountainCarV0/optCtrl.m @@ -1,43 +1,39 @@ function uOpt = optCtrl(obj, t, xs, deriv, uMode, ~) % uOpt = optCtrl(obj, t, deriv, uMode, dMode, MIEdims) -position = xs{1}; -velocity = xs{2}; - %% Input processing if nargin < 5 uMode = 'min'; end +if ~iscell(deriv) + deriv = num2cell(deriv); +end -%% https://github.com/ZhiqingXiao/OpenAIGymSolution/blob/master/MountainCar-v0_close_form/mountaincar_v0_close_form.ipynb -% position, velocity = observation -% lb = min(-0.09 * (position + 0.25) ** 2 + 0.03, -% 0.3 * (position + 0.9) ** 4 - 0.008) -% ub = -0.07 * (position + 0.38) ** 2 + 0.07 -% if lb < velocity < ub: -% action = 2 # push right -% else: -% action = 0 # push left - -% TODO: Use deriv instead of hard-coded values above? -% TODO: not sure this is working... +%% Optimal control -lb = min(-0.09 * (position + 0.25)^2 + 0.03, ... - 0.3 * (position + 0.9)^4 - 0.008); +% https://arxiv.org/pdf/1709.07523.pdf +% Appendix +% need dot product of deriv and dx = f(x) +% then argmin terms with action to find optimal control -ub = -0.07 * (position + 0.38)^2 + 0.07; +% we have +% dx(1) = velocity; +% u ~ { 0, 1, 2 } +% dx(2) = (u - 1) * force + cos(3 * position) * (-gravity); -% action = 0; % push left -% if (lb < velocity) & (velocity < ub) -% action = 2; % push right -% end -% TODO: handle uMode (always min for Backwards Reachability with goal) +% u* = argmin_u (deriv{[0, 1]} * dx(2)) +% = argmin_u (deriv{[0, 1]} (u - 1) * force) +% = 0 when deriv{[0, 1]} >= 0, 2 when deriv{[0, 1]} < 0 %% Optimal control -actions = zeros(size(xs{1})); -actions((lb < velocity) & (velocity < ub)) = 2; -uOpt = actions; +if strcmp(uMode, 'max') + uOpt = (deriv{1} >= 0) * 2 + (deriv{1} < 0) * 0; +elseif strcmp(uMode, 'min') + uOpt = (deriv{1} >= 0) * 0 + (deriv{1} < 0) * 2; +else + error('Unknown uMode!') +end end \ No newline at end of file diff --git a/mountain_car.m b/mountain_car.m index 99e7bba..c42bac3 100644 --- a/mountain_car.m +++ b/mountain_car.m @@ -47,17 +47,14 @@ function mountain_car() min_position = -1.2; max_position = 0.6; -goal_position = 0.5; -goal_velocity = 0.0; - %% Should we compute the trajectory? -% compTraj = true; -compTraj = false; +compTraj = true; +% compTraj = false; %% Grid grid_min = [min_position; min_velocity]; % Lower corner of computation domain grid_max = [max_position; max_velocity]; % Upper corner of computation domain -N = [141; 141]; % Number of grid points per dimension +N = [181; 181]; % Number of grid points per dimension grid = createGrid(grid_min, grid_max, N); % state space dimensions @@ -69,19 +66,18 @@ function mountain_car() % data0 = shapeCylinder(grid, 3, [0; 0; 0], R); % also try shapeRectangleByCorners, shapeSphere, etc. -center = [0.55; 0]; -widths = [0.05; 0.01]; - +% center = [0.55; 0]; +% widths = [0.05; 0.01]; -center = [0.55; 0]; -widths = [0.15; 0.03]; +center = [0.5; 0]; +widths = [0.1; 0.02]; data0 = shapeRectangleByCenter(grid, center, widths); %% time vector t0 = 0; % changed the time from 2 seconds to 15 -tMax = 20; +tMax = 80; dt = 0.1; tau = t0:dt:tMax; @@ -102,7 +98,7 @@ function mountain_car() schemeData.grid = grid; schemeData.dynSys = mCar; % schemeData.accuracy = 'high'; -schemeData.accuracy = 'low'; +schemeData.accuracy = 'veryHigh'; schemeData.uMode = uMode; %% Compute value function @@ -139,19 +135,19 @@ function mountain_car() pause %set the initial state - xinit = [2, 2]; + xinit = [-0.5, 0]; figure(6) clf - h = visSetIm(g, data(:,:,:,end)); - h.FaceAlpha = .3; + h = visSetIm(grid, data(:,:,:,end)); +% h.FaceAlpha = .3; hold on s = scatter3(xinit(1), xinit(2), xinit(3)); s.SizeData = 70; %check if this initial state is in the BRS/BRT %value = eval_u(g, data, x) - value = eval_u(g,data(:,:,:,end),xinit); + value = eval_u(grid,data(:,:,:,end),xinit); if value <= 0 %if initial state is in BRS/BRT % find optimal trajectory @@ -171,7 +167,7 @@ function mountain_car() % [traj, traj_tau] = ... % computeOptTraj(g, data, tau, dynSys, extraArgs) [traj, traj_tau] = ... - computeOptTraj(g, dataTraj, tau2, mCar, TrajextraArgs); + computeOptTraj(grid, dataTraj, tau2, mCar, TrajextraArgs); else error(['Initial state is not in the BRS/BRT! It have a value of ' num2str(value,2)]) end