-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathacrobot.py
executable file
·235 lines (188 loc) · 7.51 KB
/
acrobot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
#!/usr/bin/env python
##
#
# Swing-up control of an acrobot
#
##
import numpy as np
from pydrake.all import *
from ilqr import IterativeLinearQuadraticRegulator
import time
import utils_derivs_interpolation
####################################
# Parameters
####################################
T = 3 # total simulation time (S)
dt = 0.004 # simulation timestep
# Parameters for derivative interpolation
use_derivative_interpolation = False # Use derivative interpolation
keypoint_method = 'adaptiveJerk' # 'setInterval, or 'adaptiveJerk' or 'iterativeError'
minN = 5 # Minimum interval between key-points
maxN = 100 # Maximum interval between key-points
jerk_threshold = 0.0007 # Jerk threshold to trigger new key-point (only used in adaptiveJerk)
iterative_error_threshold = 0.00005 # Error threshold to trigger new key-point (only used in iterativeError)
# Solver method
# must be "ilqr" or "sqp"
method = "ilqr"
MPC = False # MPC only works with ilqr for now
meshcat_visualisation = False # Visualisation with meshcat or drake visualizer
# Initial state
x0 = np.array([0,0,0,0])
# Target state
x_nom = np.array([np.pi,0,0,0])
# Quadratic cost int_{0^T} (x'Qx + u'Ru) + x_T*Qf*x_T
Q = 0.01*np.diag([0,0,1,1])
R = 0.01*np.eye(1)
Qf = 100*np.diag([1,1,1,1])
####################################
# Tools for system setup
####################################
def create_system_model(plant):
urdf = FindResourceOrThrow("drake/examples/acrobot/Acrobot.urdf")
robot = Parser(plant=plant).AddModels(urdf)[0]
plant.Finalize()
return plant
####################################
# Create system diagram
####################################
builder = DiagramBuilder()
plant, scene_graph = AddMultibodyPlantSceneGraph(builder, dt)
plant = create_system_model(plant)
assert plant.geometry_source_is_registered()
controller = builder.AddSystem(ConstantVectorSource(np.zeros(1)))
builder.Connect(
controller.get_output_port(),
plant.get_actuation_input_port())
if(meshcat_visualisation):
meshcat = StartMeshcat()
visualizer = MeshcatVisualizer.AddToBuilder(
builder, scene_graph, meshcat,
MeshcatVisualizerParams(role=Role.kPerception, prefix="visual"))
else:
DrakeVisualizer().AddToBuilder(builder, scene_graph)
ConnectContactResultsToDrakeVisualizer(builder, plant, scene_graph)
diagram = builder.Build()
diagram_context = diagram.CreateDefaultContext()
plant_context = diagram.GetMutableSubsystemContext(
plant, diagram_context)
#####################################
# Solve Trajectory Optimization
#####################################
# System model for the trajectory optimizer
plant_ = MultibodyPlant(dt)
plant_ = create_system_model(plant_)
input_port_index = plant_.get_actuation_input_port().get_index()
#-----------------------------------------
# DDP method
#-----------------------------------------
def solve_ilqr(solver, x0, u_guess):
"""
Convienience function for solving the optimization
problem from the given initial state with the given
guess of control inputs.
"""
solver.SetInitialState(x0)
solver.SetInitialGuess(u_guess)
states, inputs, solve_time, optimal_cost = solver.Solve()
return states, inputs, solve_time, optimal_cost
if method == "ilqr":
# Set up optimizer
num_steps = int(T/dt)
if use_derivative_interpolation:
interpolation_method = utils_derivs_interpolation.derivs_interpolation(keypoint_method, minN, maxN, jerk_threshold, iterative_error_threshold)
else:
interpolation_method = None
ilqr = IterativeLinearQuadraticRegulator(plant_, num_steps,
input_port_index=input_port_index,
beta=0.5, derivs_keypoint_method = interpolation_method)
# Define the problem
ilqr.SetTargetState(x_nom)
ilqr.SetRunningCost(dt*Q, dt*R)
ilqr.SetTerminalCost(Qf)
# Set initial guess
u_guess = np.zeros((1,num_steps-1))
if MPC:
# MPC parameters
num_resolves = 50 # total number of times to resolve the optimizaiton problem
replan_steps = 2 # number of timesteps after which to move the horizon and
# re-solve the MPC problem (>0)
total_num_steps = num_steps + replan_steps*num_resolves
total_T = total_num_steps*dt
states = np.zeros((4,total_num_steps))
# Solve to get an initial trajectory
x, u, _, _ = solve_ilqr(ilqr, x0, u_guess)
states[:,0:num_steps] = x
for i in range(num_resolves):
# Set new state and control input
last_u = u[:,-1]
u_guess = np.block([
u[:,replan_steps:], # keep same control inputs from last optimal sol'n
np.repeat(last_u[np.newaxis].T,replan_steps,axis=1) # for new timesteps copy
]) # the last known control input
x0 = x[:,replan_steps]
# Resolve the optimization
x, u, _, _ = solve_ilqr(ilqr, x0, u_guess)
# Save the result for playback
start_idx = (i+1)*replan_steps
end_idx = start_idx + num_steps
states[:,start_idx:end_idx] = x
timesteps = np.arange(0.0,total_T,dt)
else:
states, inputs, solve_time, optimal_cost = solve_ilqr(ilqr, x0, u_guess)
print(f"Solved in {solve_time} seconds using iLQR")
print(f"Optimal cost: {optimal_cost}")
timesteps = np.arange(0.0,T,dt)
#-----------------------------------------
# Direct Transcription method
#-----------------------------------------
elif method == "sqp":
context_ = plant_.CreateDefaultContext()
# Set up the solver object
trajopt = DirectTranscription(
plant_, context_,
input_port_index=input_port_index,
num_time_samples=int(T/dt))
# Add constraints
x = trajopt.state()
u = trajopt.input()
x_init = trajopt.initial_state()
trajopt.prog().AddConstraint(eq( x_init, x0 ))
x_err = x - x_nom
trajopt.AddRunningCost(x_err.T@Q@x_err + u.T@R@u)
trajopt.AddFinalCost(x_err.T@Qf@x_err)
# Solve the optimization problem
st = time.time()
res = Solve(trajopt.prog())
solve_time = time.time() - st
assert res.is_success(), "trajectory optimizer failed"
solver_name = res.get_solver_id().name()
optimal_cost = res.get_optimal_cost()
print(f"Solved in {solve_time} seconds using SQP via {solver_name}")
print(f"Optimal cost: {optimal_cost}")
# Extract the solution
timesteps = trajopt.GetSampleTimes(res)
states = trajopt.GetStateSamples(res)
inputs = trajopt.GetInputSamples(res)
else:
raise ValueError("Unknown method %s"%method)
#####################################
# Playback
#####################################
def playback(states, timesteps):
"""
Convienience function for visualising the given trajectory.
Relies on diagram, diagram_context, plant, and plant_context
being defined outside of the scope of this function and connected
to the Drake visualizer.
"""
while True:
# Just keep playing back the trajectory
for i in range(len(timesteps)):
t = timesteps[i]
x = states[:,i]
diagram_context.SetTime(t)
plant.SetPositionsAndVelocities(plant_context, x)
diagram.ForcedPublish(diagram_context)
time.sleep(dt-3e-4)
time.sleep(1)
playback(states,timesteps)