forked from udacity/MLND_CN_P5_Reinforcement_Learning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRobot.py
122 lines (100 loc) · 3.53 KB
/
Robot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import random
class Robot(object):
def __init__(self, maze, alpha=0.5, gamma=0.9, epsilon0=0.5):
self.maze = maze
self.valid_actions = self.maze.valid_actions
self.state = None
self.action = None
# Set Parameters of the Learning Robot
self.alpha = alpha
self.gamma = gamma
self.epsilon0 = epsilon0
self.epsilon = epsilon0
self.t = 0
self.Qtable = {}
self.reset()
def reset(self):
"""
Reset the robot
"""
self.state = self.sense_state()
self.create_Qtable_line(self.state)
def set_status(self, learning=False, testing=False):
"""
Determine whether the robot is learning its q table, or
exceuting the testing procedure.
"""
self.learning = learning
self.testing = testing
def update_parameter(self):
"""
Some of the paramters of the q learning robot can be altered,
update these parameters when necessary.
"""
if self.testing:
# TODO 1. No random choice when testing
pass
else:
# TODO 2. Update parameters when learning
pass
return self.epsilon
def sense_state(self):
"""
Get the current state of the robot. In this
"""
# TODO 3. Return robot's current state
return None
def create_Qtable_line(self, state):
"""
Create the qtable with the current state
"""
# TODO 4. Create qtable with current state
# Our qtable should be a two level dict,
# Qtable[state] ={'u':xx, 'd':xx, ...}
# If Qtable[state] already exits, then do
# not change it.
pass
def choose_action(self):
"""
Return an action according to given rules
"""
def is_random_exploration():
# TODO 5. Return whether do random choice
# hint: generate a random number, and compare
# it with epsilon
pass
if self.learning:
if is_random_exploration():
# TODO 6. Return random choose aciton
return None
else:
# TODO 7. Return action with highest q value
return None
elif self.testing:
# TODO 7. choose action with highest q value
else:
# TODO 6. Return random choose aciton
def update_Qtable(self, r, action, next_state):
"""
Update the qtable according to the given rule.
"""
if self.learning:
pass
# TODO 8. When learning, update the q table according
# to the given rules
def update(self):
"""
Describle the procedure what to do when update the robot.
Called every time in every epoch in training or testing.
Return current action and reward.
"""
self.state = self.sense_state() # Get the current state
self.create_Qtable_line(self.state) # For the state, create q table line
action = self.choose_action() # choose action for this state
reward = self.maze.move_robot(action) # move robot for given action
next_state = self.sense_state() # get next state
self.create_Qtable_line(next_state) # create q table line for next state
if self.learning and not self.testing:
self.update_Qtable(reward, action, next_state) # update q table
self.update_parameter() # update parameters
return action, reward