-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.html
504 lines (504 loc) · 27.8 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 2.40.1 (20161225.0304)
-->
<!-- Title: G Pages: 1 -->
<svg width="1323pt" height="746pt"
viewBox="0.00 0.00 1322.94 746.43" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 742.4264)">
<title>G</title>
<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-742.4264 1318.9418,-742.4264 1318.9418,4 -4,4"/>
<!-- Reinforcement Learning -->
<g id="node1" class="node">
<title>Reinforcement Learning</title>
<g id="a_node1"><a xlink:href="./html/reinforcement_learning.html" xlink:title="Reinforcement Learning">
<ellipse fill="none" stroke="#000000" cx="902.6442" cy="-717.2132" rx="149.8133" ry="21.4286"/>
<text text-anchor="middle" x="902.6442" y="-712.2132" font-family="Times,serif" font-size="20.00" fill="#000000">Reinforcement Learning</text>
</a>
</g>
</g>
<!-- Policy Optimization -->
<g id="node2" class="node">
<title>Policy Optimization</title>
<g id="a_node2"><a xlink:href="./html/policy_optimization.html" xlink:title="Policy Optimization">
<ellipse fill="none" stroke="#000000" cx="902.6442" cy="-627" rx="82.5854" ry="18"/>
<text text-anchor="middle" x="902.6442" y="-623.3" font-family="Times,serif" font-size="14.00" fill="#000000">Policy Optimization</text>
</a>
</g>
</g>
<!-- Reinforcement Learning->Policy Optimization -->
<g id="edge1" class="edge">
<title>Reinforcement Learning->Policy Optimization</title>
<path fill="none" stroke="#000000" d="M902.6442,-695.8339C902.6442,-683.7849 902.6442,-668.5781 902.6442,-655.5239"/>
<polygon fill="#000000" stroke="#000000" points="906.1443,-655.1813 902.6442,-645.1814 899.1443,-655.1814 906.1443,-655.1813"/>
<text text-anchor="middle" x="917.6442" y="-666.8" font-family="Times,serif" font-size="14.00" fill="#000000">seeks</text>
</g>
<!-- Value-Based -->
<g id="node3" class="node">
<title>Value-Based</title>
<g id="a_node3"><a xlink:href="./html/value_based.html" xlink:title="Value-Based">
<ellipse fill="none" stroke="#000000" cx="763.6442" cy="-453" rx="55.7903" ry="18"/>
<text text-anchor="middle" x="763.6442" y="-449.3" font-family="Times,serif" font-size="14.00" fill="#000000">Value-Based</text>
</a>
</g>
</g>
<!-- Policy Optimization->Value-Based -->
<g id="edge2" class="edge">
<title>Policy Optimization->Value-Based</title>
<path fill="none" stroke="#000000" d="M877.5353,-609.7696C860.0903,-596.9271 837.11,-578.1833 820.6442,-558 801.1132,-534.0596 785.0127,-502.4299 774.9631,-480.1646"/>
<polygon fill="#000000" stroke="#000000" points="778.1396,-478.6932 770.9076,-470.9505 771.7327,-481.5132 778.1396,-478.6932"/>
<text text-anchor="middle" x="830.1442" y="-536.3" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- Policy-Based -->
<g id="node4" class="node">
<title>Policy-Based</title>
<g id="a_node4"><a xlink:href="./html/policy_based.html" xlink:title="Policy-Based">
<ellipse fill="none" stroke="#000000" cx="1000.6442" cy="-453" rx="57.6901" ry="18"/>
<text text-anchor="middle" x="1000.6442" y="-449.3" font-family="Times,serif" font-size="14.00" fill="#000000">Policy-Based</text>
</a>
</g>
</g>
<!-- Policy Optimization->Policy-Based -->
<g id="edge3" class="edge">
<title>Policy Optimization->Policy-Based</title>
<path fill="none" stroke="#000000" d="M922.0604,-609.5028C935.6968,-596.3918 953.5835,-577.4337 965.6442,-558 979.2229,-536.1201 978.5584,-528.4486 986.6442,-504 989.102,-496.5685 991.4753,-488.4309 993.548,-480.8815"/>
<polygon fill="#000000" stroke="#000000" points="996.9686,-481.6377 996.1676,-471.0732 990.2057,-479.8314 996.9686,-481.6377"/>
<text text-anchor="middle" x="991.1442" y="-536.3" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- Actor-Critic -->
<g id="node5" class="node">
<title>Actor-Critic</title>
<g id="a_node5"><a xlink:href="./html/actor_critic.html" xlink:title="Actor-Critic">
<ellipse fill="none" stroke="#000000" cx="902.6442" cy="-540" rx="53.8905" ry="18"/>
<text text-anchor="middle" x="902.6442" y="-536.3" font-family="Times,serif" font-size="14.00" fill="#000000">Actor-Critic</text>
</a>
</g>
</g>
<!-- Policy Optimization->Actor-Critic -->
<g id="edge4" class="edge">
<title>Policy Optimization->Actor-Critic</title>
<path fill="none" stroke="#000000" d="M902.6442,-608.9735C902.6442,-597.1918 902.6442,-581.5607 902.6442,-568.1581"/>
<polygon fill="#000000" stroke="#000000" points="906.1443,-568.0033 902.6442,-558.0034 899.1443,-568.0034 906.1443,-568.0033"/>
<text text-anchor="middle" x="912.1442" y="-579.8" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- Dynamic Programming -->
<g id="node12" class="node">
<title>Dynamic Programming</title>
<g id="a_node12"><a xlink:href="./html/dynamic_programming.html" xlink:title="Dynamic Programming">
<ellipse fill="none" stroke="#0000ff" cx="364.6442" cy="-366" rx="94.7833" ry="18"/>
<text text-anchor="middle" x="364.6442" y="-362.3" font-family="Times,serif" font-size="14.00" fill="#000000">Dynamic Programming</text>
</a>
</g>
</g>
<!-- Value-Based->Dynamic Programming -->
<g id="edge6" class="edge">
<title>Value-Based->Dynamic Programming</title>
<path fill="none" stroke="#000000" d="M717.0929,-442.8497C648.2916,-427.8479 518.837,-399.621 437.424,-381.8693"/>
<polygon fill="#000000" stroke="#000000" points="437.7662,-378.3617 427.2501,-379.6509 436.2749,-385.201 437.7662,-378.3617"/>
<text text-anchor="middle" x="598.1442" y="-405.8" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- TD-lambda -->
<g id="node28" class="node">
<title>TD-lambda</title>
<g id="a_node28"><a xlink:href="./html/td_lambda.html" xlink:title="TD-lambda">
<ellipse fill="none" stroke="#ff0000" cx="760.6442" cy="-366" rx="51.9908" ry="18"/>
<text text-anchor="middle" x="760.6442" y="-362.3" font-family="Times,serif" font-size="14.00" fill="#000000">TD-lambda</text>
</a>
</g>
</g>
<!-- Value-Based->TD-lambda -->
<g id="edge5" class="edge">
<title>Value-Based->TD-lambda</title>
<path fill="none" stroke="#000000" d="M763.0226,-434.9735C762.6163,-423.1918 762.0773,-407.5607 761.6151,-394.1581"/>
<polygon fill="#000000" stroke="#000000" points="765.1076,-393.8768 761.265,-384.0034 758.1118,-394.1181 765.1076,-393.8768"/>
<text text-anchor="middle" x="772.1442" y="-405.8" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- Gradient-Based -->
<g id="node6" class="node">
<title>Gradient-Based</title>
<g id="a_node6"><a xlink:href="./html/gradient_based.html" xlink:title="Gradient-Based">
<ellipse fill="none" stroke="#000000" cx="1001.6442" cy="-366" rx="65.7887" ry="18"/>
<text text-anchor="middle" x="1001.6442" y="-362.3" font-family="Times,serif" font-size="14.00" fill="#000000">Gradient-Based</text>
</a>
</g>
</g>
<!-- Policy-Based->Gradient-Based -->
<g id="edge7" class="edge">
<title>Policy-Based->Gradient-Based</title>
<path fill="none" stroke="#000000" d="M1000.8514,-434.9735C1000.9868,-423.1918 1001.1665,-407.5607 1001.3205,-394.1581"/>
<polygon fill="#000000" stroke="#000000" points="1004.822,-394.043 1001.4372,-384.0034 997.8224,-393.9624 1004.822,-394.043"/>
<text text-anchor="middle" x="1011.1442" y="-405.8" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- Sampling-Based -->
<g id="node7" class="node">
<title>Sampling-Based</title>
<g id="a_node7"><a xlink:href="./html/sampling_based.html" xlink:title="Sampling-Based">
<ellipse fill="none" stroke="#000000" cx="1211.6442" cy="-366" rx="69.5877" ry="18"/>
<text text-anchor="middle" x="1211.6442" y="-362.3" font-family="Times,serif" font-size="14.00" fill="#000000">Sampling-Based</text>
</a>
</g>
</g>
<!-- Policy-Based->Sampling-Based -->
<g id="edge8" class="edge">
<title>Policy-Based->Sampling-Based</title>
<path fill="none" stroke="#000000" d="M1035.9608,-438.4382C1071.3265,-423.8561 1125.9491,-401.334 1164.8929,-385.2766"/>
<polygon fill="#000000" stroke="#000000" points="1166.524,-388.39 1174.4348,-381.3423 1163.8556,-381.9185 1166.524,-388.39"/>
<text text-anchor="middle" x="1129.1442" y="-405.8" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- Actor-Critic->Value-Based -->
<g id="edge9" class="edge">
<title>Actor-Critic->Value-Based</title>
<path fill="none" stroke="#000000" d="M877.1494,-524.0429C854.9521,-510.1496 822.581,-489.8885 798.0837,-474.5557"/>
<polygon fill="#000000" stroke="#000000" points="799.7815,-471.4893 789.448,-469.1506 796.0677,-477.4229 799.7815,-471.4893"/>
<text text-anchor="middle" x="859.6442" y="-492.8" font-family="Times,serif" font-size="14.00" fill="#000000">blends</text>
</g>
<!-- Actor-Critic->Policy-Based -->
<g id="edge10" class="edge">
<title>Actor-Critic->Policy-Based</title>
<path fill="none" stroke="#000000" d="M917.4443,-522.671C926.5669,-512.4097 938.7591,-499.421 950.6442,-489 955.9398,-484.3567 961.8189,-479.7205 967.6312,-475.3948"/>
<polygon fill="#000000" stroke="#000000" points="969.9786,-478.0162 976.0231,-469.3147 965.8716,-472.3476 969.9786,-478.0162"/>
<text text-anchor="middle" x="968.6442" y="-492.8" font-family="Times,serif" font-size="14.00" fill="#000000">blends</text>
</g>
<!-- PILCO -->
<g id="node11" class="node">
<title>PILCO</title>
<g id="a_node11"><a xlink:href="./html/pilco.html" xlink:title="PILCO">
<ellipse fill="none" stroke="#0000ff" cx="1126.6442" cy="-279" rx="36.2938" ry="18"/>
<text text-anchor="middle" x="1126.6442" y="-275.3" font-family="Times,serif" font-size="14.00" fill="#000000">PILCO</text>
</a>
</g>
</g>
<!-- Gradient-Based->PILCO -->
<g id="edge16" class="edge">
<title>Gradient-Based->PILCO</title>
<path fill="none" stroke="#000000" d="M1025.7454,-349.2255C1046.163,-335.0149 1075.507,-314.5915 1097.2911,-299.4297"/>
<polygon fill="#000000" stroke="#000000" points="1099.3197,-302.2822 1105.528,-293.6968 1095.3209,-296.5368 1099.3197,-302.2822"/>
<text text-anchor="middle" x="1081.1442" y="-318.8" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- LQR -->
<g id="node16" class="node">
<title>LQR</title>
<g id="a_node16"><a xlink:href="./html/lqr.html" xlink:title="LQR">
<ellipse fill="none" stroke="#0000ff" cx="892.6442" cy="-279" rx="28.6953" ry="18"/>
<text text-anchor="middle" x="892.6442" y="-275.3" font-family="Times,serif" font-size="14.00" fill="#000000">LQR</text>
</a>
</g>
</g>
<!-- Gradient-Based->LQR -->
<g id="edge17" class="edge">
<title>Gradient-Based->LQR</title>
<path fill="none" stroke="#000000" d="M980.1093,-348.8116C962.3825,-334.6627 937.2025,-314.5649 918.4082,-299.5639"/>
<polygon fill="#000000" stroke="#000000" points="920.528,-296.7777 910.5289,-293.2749 916.1612,-302.2487 920.528,-296.7777"/>
<text text-anchor="middle" x="964.1442" y="-318.8" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- Policy Gradient -->
<g id="node18" class="node">
<title>Policy Gradient</title>
<g id="a_node18"><a xlink:href="./html/policy_gradient.html" xlink:title="Policy Gradient">
<ellipse fill="none" stroke="#ff0000" cx="1005.6442" cy="-279" rx="66.0889" ry="18"/>
<text text-anchor="middle" x="1005.6442" y="-275.3" font-family="Times,serif" font-size="14.00" fill="#000000">Policy Gradient</text>
</a>
</g>
</g>
<!-- Gradient-Based->Policy Gradient -->
<g id="edge18" class="edge">
<title>Gradient-Based->Policy Gradient</title>
<g id="a_edge18"><a xlink:href="./html/from_policy_gradient_to_policy_based.html" xlink:title="has">
<path fill="none" stroke="#000000" d="M1002.473,-347.9735C1003.0147,-336.1918 1003.7333,-320.5607 1004.3496,-307.1581"/>
<polygon fill="#000000" stroke="#000000" points="1007.8534,-307.1536 1004.8164,-297.0034 1000.8607,-306.832 1007.8534,-307.1536"/>
</a>
</g>
<g id="a_edge18-label"><a xlink:href="./html/from_policy_gradient_to_policy_based.html" xlink:title="has">
<text text-anchor="middle" x="1014.1442" y="-318.8" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</a>
</g>
</g>
<!-- CEM -->
<g id="node17" class="node">
<title>CEM</title>
<g id="a_node17"><a xlink:href="./html/cem.html" xlink:title="CEM">
<ellipse fill="none" stroke="#0000ff" cx="1211.6442" cy="-279" rx="30.5947" ry="18"/>
<text text-anchor="middle" x="1211.6442" y="-275.3" font-family="Times,serif" font-size="14.00" fill="#000000">CEM</text>
</a>
</g>
</g>
<!-- Sampling-Based->CEM -->
<g id="edge19" class="edge">
<title>Sampling-Based->CEM</title>
<path fill="none" stroke="#000000" d="M1211.6442,-347.9735C1211.6442,-336.1918 1211.6442,-320.5607 1211.6442,-307.1581"/>
<polygon fill="#000000" stroke="#000000" points="1215.1443,-307.0033 1211.6442,-297.0034 1208.1443,-307.0034 1215.1443,-307.0033"/>
<text text-anchor="middle" x="1221.1442" y="-318.8" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- NES -->
<g id="node29" class="node">
<title>NES</title>
<g id="a_node29"><a xlink:href="./html/nes.html" xlink:title="NES">
<ellipse fill="none" stroke="#ff0000" cx="1287.6442" cy="-279" rx="27.0966" ry="18"/>
<text text-anchor="middle" x="1287.6442" y="-275.3" font-family="Times,serif" font-size="14.00" fill="#000000">NES</text>
</a>
</g>
</g>
<!-- Sampling-Based->NES -->
<g id="edge20" class="edge">
<title>Sampling-Based->NES</title>
<path fill="none" stroke="#000000" d="M1227.0239,-348.3943C1238.5885,-335.1559 1254.5477,-316.8868 1267.1883,-302.4165"/>
<polygon fill="#000000" stroke="#000000" points="1269.8775,-304.6582 1273.8206,-294.8244 1264.6057,-300.0529 1269.8775,-304.6582"/>
<text text-anchor="middle" x="1263.1442" y="-318.8" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- Model-Based -->
<g id="node8" class="node">
<title>Model-Based</title>
<g id="a_node8"><a xlink:href="./html/model_based.html" xlink:title="Model-Based">
<ellipse fill="none" stroke="#0000ff" cx="1128.6442" cy="-717.2132" rx="58.4896" ry="18"/>
<text text-anchor="middle" x="1128.6442" y="-713.5132" font-family="Times,serif" font-size="14.00" fill="#000000">Model-Based</text>
</a>
</g>
</g>
<!-- RHC/MPC -->
<g id="node10" class="node">
<title>RHC/MPC</title>
<g id="a_node10"><a xlink:href="./html/rhc_or_mpc.html" xlink:title="RHC/MPC">
<ellipse fill="none" stroke="#0000ff" cx="1128.6442" cy="-627" rx="50.0912" ry="18"/>
<text text-anchor="middle" x="1128.6442" y="-623.3" font-family="Times,serif" font-size="14.00" fill="#000000">RHC/MPC</text>
</a>
</g>
</g>
<!-- Model-Based->RHC/MPC -->
<g id="edge30" class="edge">
<title>Model-Based->RHC/MPC</title>
<path fill="none" stroke="#000000" d="M1128.6442,-698.9572C1128.6442,-686.4849 1128.6442,-669.7062 1128.6442,-655.4884"/>
<polygon fill="#000000" stroke="#000000" points="1132.1443,-655.2288 1128.6442,-645.2288 1125.1443,-655.2289 1132.1443,-655.2288"/>
<text text-anchor="middle" x="1141.1442" y="-666.8" font-family="Times,serif" font-size="14.00" fill="#000000">with</text>
</g>
<!-- Model-Free -->
<g id="node9" class="node">
<title>Model-Free</title>
<g id="a_node9"><a xlink:href="./html/model_free.html" xlink:title="Model-Free">
<ellipse fill="none" stroke="#ff0000" cx="1257.6442" cy="-717.2132" rx="52.7911" ry="18"/>
<text text-anchor="middle" x="1257.6442" y="-713.5132" font-family="Times,serif" font-size="14.00" fill="#000000">Model-Free</text>
</a>
</g>
</g>
<!-- Value Iteration -->
<g id="node13" class="node">
<title>Value Iteration</title>
<g id="a_node13"><a xlink:href="./html/value_iteration.html" xlink:title="Value Iteration">
<ellipse fill="none" stroke="#0000ff" cx="463.6442" cy="-279" rx="63.8893" ry="18"/>
<text text-anchor="middle" x="463.6442" y="-275.3" font-family="Times,serif" font-size="14.00" fill="#000000">Value Iteration</text>
</a>
</g>
</g>
<!-- Dynamic Programming->Value Iteration -->
<g id="edge14" class="edge">
<title>Dynamic Programming->Value Iteration</title>
<path fill="none" stroke="#000000" d="M384.6783,-348.3943C399.5361,-335.3374 419.9625,-317.3869 436.3179,-303.014"/>
<polygon fill="#000000" stroke="#000000" points="438.7764,-305.5129 443.9777,-296.2827 434.1556,-300.2548 438.7764,-305.5129"/>
<text text-anchor="middle" x="430.1442" y="-318.8" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- Policy Iteration -->
<g id="node14" class="node">
<title>Policy Iteration</title>
<g id="a_node14"><a xlink:href="./html/policy_iteration.html" xlink:title="Policy Iteration">
<ellipse fill="none" stroke="#0000ff" cx="65.6442" cy="-279" rx="65.7887" ry="18"/>
<text text-anchor="middle" x="65.6442" y="-275.3" font-family="Times,serif" font-size="14.00" fill="#000000">Policy Iteration</text>
</a>
</g>
</g>
<!-- Dynamic Programming->Policy Iteration -->
<g id="edge15" class="edge">
<title>Dynamic Programming->Policy Iteration</title>
<path fill="none" stroke="#000000" d="M312.5624,-350.8458C258.8508,-335.2173 175.4246,-310.9428 120.6604,-295.0081"/>
<polygon fill="#000000" stroke="#000000" points="121.3616,-291.567 110.7819,-292.1337 119.4058,-298.2883 121.3616,-291.567"/>
<text text-anchor="middle" x="243.1442" y="-318.8" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- Bellman Optimality Equation -->
<g id="node15" class="node">
<title>Bellman Optimality Equation</title>
<g id="a_node15"><a xlink:href="./html/bellman.html" xlink:title="Bellman Optimality Equation">
<ellipse fill="none" stroke="#0000ff" cx="265.6442" cy="-279" rx="116.1796" ry="18"/>
<text text-anchor="middle" x="265.6442" y="-275.3" font-family="Times,serif" font-size="14.00" fill="#000000">Bellman Optimality Equation</text>
</a>
</g>
</g>
<!-- Dynamic Programming->Bellman Optimality Equation -->
<g id="edge13" class="edge">
<title>Dynamic Programming->Bellman Optimality Equation</title>
<path fill="none" stroke="#000000" d="M344.61,-348.3943C329.9465,-335.5081 309.8589,-317.8554 293.6139,-303.5794"/>
<polygon fill="#000000" stroke="#000000" points="295.8104,-300.8503 285.9883,-296.8782 291.1896,-306.1084 295.8104,-300.8503"/>
<text text-anchor="middle" x="338.6442" y="-318.8" font-family="Times,serif" font-size="14.00" fill="#000000">solves</text>
</g>
<!-- REINFORCE -->
<g id="node19" class="node">
<title>REINFORCE</title>
<g id="a_node19"><a xlink:href="./html/reinforce.html" xlink:title="REINFORCE">
<ellipse fill="none" stroke="#ff0000" cx="1005.6442" cy="-192" rx="59.5901" ry="18"/>
<text text-anchor="middle" x="1005.6442" y="-188.3" font-family="Times,serif" font-size="14.00" fill="#000000">REINFORCE</text>
</a>
</g>
</g>
<!-- Policy Gradient->REINFORCE -->
<g id="edge21" class="edge">
<title>Policy Gradient->REINFORCE</title>
<path fill="none" stroke="#000000" d="M1005.6442,-260.9735C1005.6442,-249.1918 1005.6442,-233.5607 1005.6442,-220.1581"/>
<polygon fill="#000000" stroke="#000000" points="1009.1443,-220.0033 1005.6442,-210.0034 1002.1443,-220.0034 1009.1443,-220.0033"/>
<text text-anchor="middle" x="1015.1442" y="-231.8" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- TRPO -->
<g id="node20" class="node">
<title>TRPO</title>
<g id="a_node20"><a xlink:href="./html/trpo.html" xlink:title="TRPO">
<ellipse fill="none" stroke="#ff0000" cx="958.6442" cy="-105" rx="33.2948" ry="18"/>
<text text-anchor="middle" x="958.6442" y="-101.3" font-family="Times,serif" font-size="14.00" fill="#000000">TRPO</text>
</a>
</g>
</g>
<!-- REINFORCE->TRPO -->
<g id="edge22" class="edge">
<title>REINFORCE->TRPO</title>
<path fill="none" stroke="#000000" d="M991.5526,-174.2362C987.423,-168.5893 983.0946,-162.197 979.6442,-156 975.5173,-148.5882 971.7318,-140.1985 968.5456,-132.3953"/>
<polygon fill="#000000" stroke="#000000" points="971.7509,-130.9816 964.8497,-122.9427 965.2315,-133.5307 971.7509,-130.9816"/>
<text text-anchor="middle" x="996.6442" y="-144.8" font-family="Times,serif" font-size="14.00" fill="#000000">derive</text>
</g>
<!-- PPO -->
<g id="node21" class="node">
<title>PPO</title>
<g id="a_node21"><a xlink:href="./html/ppo.html" xlink:title="PPO">
<ellipse fill="none" stroke="#ff0000" cx="1003.6442" cy="-18" rx="27" ry="18"/>
<text text-anchor="middle" x="1003.6442" y="-14.3" font-family="Times,serif" font-size="14.00" fill="#000000">PPO</text>
</a>
</g>
</g>
<!-- REINFORCE->PPO -->
<g id="edge23" class="edge">
<title>REINFORCE->PPO</title>
<path fill="none" stroke="#000000" d="M1010.4959,-173.8153C1011.7875,-168.1513 1013.0014,-161.8551 1013.6442,-156 1017.8093,-118.0568 1012.9506,-74.0466 1008.6659,-46.1209"/>
<polygon fill="#000000" stroke="#000000" points="1012.0648,-45.2147 1007.0124,-35.9022 1005.1547,-46.3329 1012.0648,-45.2147"/>
<text text-anchor="middle" x="1032.6442" y="-101.3" font-family="Times,serif" font-size="14.00" fill="#000000">derive</text>
</g>
<!-- TRPO->PPO -->
<g id="edge24" class="edge">
<title>TRPO->PPO</title>
<g id="a_edge24"><a xlink:href="./html/from_trpo_to_ppo.html" xlink:title="vs">
<path fill="none" stroke="#000000" d="M972.4871,-78.2371C978.0807,-67.4226 984.5167,-54.9797 990.0817,-44.2208"/>
<polygon fill="#000000" stroke="#000000" points="969.2361,-76.9041 967.7506,-87.3943 975.4536,-80.1201 969.2361,-76.9041"/>
<polygon fill="#000000" stroke="#000000" points="993.2194,-45.7728 994.7049,-35.2827 987.0018,-42.5568 993.2194,-45.7728"/>
</a>
</g>
<g id="a_edge24-label"><a xlink:href="./html/from_trpo_to_ppo.html" xlink:title="vs">
<text text-anchor="middle" x="991.1442" y="-57.8" font-family="Times,serif" font-size="14.00" fill="#000000">vs</text>
</a>
</g>
</g>
<!-- SARSA -->
<g id="node22" class="node">
<title>SARSA</title>
<g id="a_node22"><a xlink:href="./html/sarsa.html" xlink:title="SARSA">
<ellipse fill="none" stroke="#ff0000" cx="739.6442" cy="-105" rx="38.9931" ry="18"/>
<text text-anchor="middle" x="739.6442" y="-101.3" font-family="Times,serif" font-size="14.00" fill="#000000">SARSA</text>
</a>
</g>
</g>
<!-- On-Policy -->
<g id="node24" class="node">
<title>On-Policy</title>
<g id="a_node24"><a xlink:href="./html/on_policy.html" xlink:title="On-Policy">
<ellipse fill="none" stroke="#ff0000" cx="739.6442" cy="-18" rx="46.5926" ry="18"/>
<text text-anchor="middle" x="739.6442" y="-14.3" font-family="Times,serif" font-size="14.00" fill="#000000">On-Policy</text>
</a>
</g>
</g>
<!-- SARSA->On-Policy -->
<g id="edge27" class="edge">
<title>SARSA->On-Policy</title>
<path fill="none" stroke="#000000" d="M739.6442,-86.9735C739.6442,-75.1918 739.6442,-59.5607 739.6442,-46.1581"/>
<polygon fill="#000000" stroke="#000000" points="743.1443,-46.0033 739.6442,-36.0034 736.1443,-46.0034 743.1443,-46.0033"/>
<text text-anchor="middle" x="744.6442" y="-57.8" font-family="Times,serif" font-size="14.00" fill="#000000">is</text>
</g>
<!-- Q-Learning -->
<g id="node23" class="node">
<title>Q-Learning</title>
<g id="a_node23"><a xlink:href="./html/q_learning.html" xlink:title="Q-Learning">
<ellipse fill="none" stroke="#ff0000" cx="828.6442" cy="-192" rx="51.9908" ry="18"/>
<text text-anchor="middle" x="828.6442" y="-188.3" font-family="Times,serif" font-size="14.00" fill="#000000">Q-Learning</text>
</a>
</g>
</g>
<!-- Q-Learning->SARSA -->
<g id="edge29" class="edge">
<title>Q-Learning->SARSA</title>
<g id="a_edge29"><a xlink:href="./html/from_q_learning_to_sarsa.html" xlink:title="vs">
<path fill="none" stroke="#000000" d="M803.5907,-167.5095C791.154,-155.3523 776.1947,-140.7291 763.8726,-128.684"/>
<polygon fill="#000000" stroke="#000000" points="801.4631,-170.3242 811.0606,-174.8116 806.3563,-165.3185 801.4631,-170.3242"/>
<polygon fill="#000000" stroke="#000000" points="766.1849,-126.0499 756.5873,-121.5624 761.2917,-131.0555 766.1849,-126.0499"/>
</a>
</g>
<g id="a_edge29-label"><a xlink:href="./html/from_q_learning_to_sarsa.html" xlink:title="vs">
<text text-anchor="middle" x="796.1442" y="-144.8" font-family="Times,serif" font-size="14.00" fill="#000000">vs</text>
</a>
</g>
</g>
<!-- Off-Policy -->
<g id="node25" class="node">
<title>Off-Policy</title>
<g id="a_node25"><a xlink:href="./html/off_policy.html" xlink:title="Off-Policy">
<ellipse fill="none" stroke="#ff0000" cx="844.6442" cy="-105" rx="48.1917" ry="18"/>
<text text-anchor="middle" x="844.6442" y="-101.3" font-family="Times,serif" font-size="14.00" fill="#000000">Off-Policy</text>
</a>
</g>
</g>
<!-- Q-Learning->Off-Policy -->
<g id="edge28" class="edge">
<title>Q-Learning->Off-Policy</title>
<path fill="none" stroke="#000000" d="M831.9594,-173.9735C834.1261,-162.1918 837.0008,-146.5607 839.4657,-133.1581"/>
<polygon fill="#000000" stroke="#000000" points="842.9667,-133.4715 841.3332,-123.0034 836.0821,-132.2053 842.9667,-133.4715"/>
<text text-anchor="middle" x="842.6442" y="-144.8" font-family="Times,serif" font-size="14.00" fill="#000000">is</text>
</g>
<!-- Monte Carlo -->
<g id="node26" class="node">
<title>Monte Carlo</title>
<g id="a_node26"><a xlink:href="./html/monte_carlo.html" xlink:title="Monte Carlo">
<ellipse fill="none" stroke="#ff0000" cx="601.6442" cy="-279" rx="55.7903" ry="18"/>
<text text-anchor="middle" x="601.6442" y="-275.3" font-family="Times,serif" font-size="14.00" fill="#000000">Monte Carlo</text>
</a>
</g>
</g>
<!-- Temporal Difference -->
<g id="node27" class="node">
<title>Temporal Difference</title>
<g id="a_node27"><a xlink:href="./html/temporal_difference.html" xlink:title="Temporal Difference">
<ellipse fill="none" stroke="#ff0000" cx="760.6442" cy="-279" rx="85.2851" ry="18"/>
<text text-anchor="middle" x="760.6442" y="-275.3" font-family="Times,serif" font-size="14.00" fill="#000000">Temporal Difference</text>
</a>
</g>
</g>
<!-- Temporal Difference->SARSA -->
<g id="edge25" class="edge">
<title>Temporal Difference->SARSA</title>
<path fill="none" stroke="#000000" d="M757.0552,-260.9057C754.4096,-247.0253 750.8939,-227.3638 748.6442,-210 745.301,-184.1968 742.8948,-154.6592 741.4163,-133.499"/>
<polygon fill="#000000" stroke="#000000" points="744.891,-133.0039 740.7276,-123.2614 737.9068,-133.4737 744.891,-133.0039"/>
<text text-anchor="middle" x="758.1442" y="-188.3" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- Temporal Difference->Q-Learning -->
<g id="edge26" class="edge">
<title>Temporal Difference->Q-Learning</title>
<path fill="none" stroke="#000000" d="M774.7338,-260.9735C784.5273,-248.4437 797.7237,-231.56 808.6088,-217.6335"/>
<polygon fill="#000000" stroke="#000000" points="811.636,-219.444 815.0366,-209.4097 806.1208,-215.1332 811.636,-219.444"/>
<text text-anchor="middle" x="809.1442" y="-231.8" font-family="Times,serif" font-size="14.00" fill="#000000">has</text>
</g>
<!-- TD-lambda->Monte Carlo -->
<g id="edge11" class="edge">
<title>TD-lambda->Monte Carlo</title>
<path fill="none" stroke="#000000" d="M732.5842,-350.6464C706.6002,-336.4288 667.7683,-315.1811 639.1359,-299.5143"/>
<polygon fill="#000000" stroke="#000000" points="640.6799,-296.3695 630.2273,-294.6398 637.3198,-302.5104 640.6799,-296.3695"/>
<text text-anchor="middle" x="709.6442" y="-318.8" font-family="Times,serif" font-size="14.00" fill="#000000">blends</text>
</g>
<!-- TD-lambda->Temporal Difference -->
<g id="edge12" class="edge">
<title>TD-lambda->Temporal Difference</title>
<path fill="none" stroke="#000000" d="M760.6442,-347.9735C760.6442,-336.1918 760.6442,-320.5607 760.6442,-307.1581"/>
<polygon fill="#000000" stroke="#000000" points="764.1443,-307.0033 760.6442,-297.0034 757.1443,-307.0034 764.1443,-307.0033"/>
<text text-anchor="middle" x="778.6442" y="-318.8" font-family="Times,serif" font-size="14.00" fill="#000000">blends</text>
</g>
</g>
</svg>