Skip to content

Commit

Permalink
update json and requirements.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
lilianweng committed May 7, 2018
1 parent 3522c80 commit fab08fc
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 15 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"env_name": "CartPole-v1",
"env_name": "MsPacman-ram-v0",
"policy_name": "DqnPolicy",
"policy_params": {
"batch_size": 32,
Expand All @@ -8,11 +8,15 @@
"epsilon": 1.0,
"epsilon_final": 0.02,
"layer_sizes": [
32,
32
128,
128
],
"lr": 0.001,
"model_type": "lstm",
"model_params": {
"lstm_layers": 1,
"lstm_size": 256
},
"step_size": 16,
"target_update_type": "hard"
},
Expand Down
21 changes: 10 additions & 11 deletions playground/policies/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ def __init__(self, env, name,
batch_size=64,
memory_capacity=100000,
model_type='mlp',
model_params=None,
step_size=1, # only > 1 if model_type is 'lstm'.
layer_sizes=None, # [64] by default.
model_params=None,
target_update_type='hard',
target_update_params=None,
double_q=True,
Expand All @@ -52,7 +52,7 @@ def __init__(self, env, name,

self.model_type = model_type
self.model_params = model_params or {}
self.layer_sizes = layer_sizes or [64]
self.layer_sizes = layer_sizes or [32, 32]
self.step_size = step_size
self.double_q = double_q
self.dueling = dueling
Expand Down Expand Up @@ -133,36 +133,35 @@ def create_q_networks(self):
self.done_flags = tf.placeholder(tf.float32, shape=(None,), name='done')

# The output is a probability distribution over all the actions.
layers_sizes = self.model_params.get('layer_sizes', [32, 32])

net_class, net_params = self._extract_network_params()

if self.dueling:
self.q_hidden = net_class(self.states, layers_sizes[:-1], name='Q_primary',
self.q_hidden = net_class(self.states, self.layer_sizes[:-1], name='Q_primary',
**net_params)
self.adv = mlp_net(self.q_hidden, layers_sizes[-1:] + [self.act_size],
self.adv = mlp_net(self.q_hidden, self.layer_sizes[-1:] + [self.act_size],
name='Q_primary_adv')
self.v = mlp_net(self.q_hidden, layers_sizes[-1:] + [1], name='Q_primary_v')
self.v = mlp_net(self.q_hidden, self.layer_sizes[-1:] + [1], name='Q_primary_v')

# Average Dueling
self.q = self.v + (self.adv - tf.reduce_mean(
self.adv, reduction_indices=1, keep_dims=True))

self.q_target_hidden = net_class(self.states_next, layers_sizes[:-1], name='Q_target',
self.q_target_hidden = net_class(self.states_next, self.layer_sizes[:-1], name='Q_target',
**net_params)
self.adv_target = mlp_net(self.q_target_hidden, layers_sizes[-1:] + [self.act_size],
self.adv_target = mlp_net(self.q_target_hidden, self.layer_sizes[-1:] + [self.act_size],
name='Q_target_adv')
self.v_target = mlp_net(self.q_target_hidden, layers_sizes[-1:] + [1],
self.v_target = mlp_net(self.q_target_hidden, self.layer_sizes[-1:] + [1],
name='Q_target_v')

# Average Dueling
self.q_target = self.v_target + (self.adv_target - tf.reduce_mean(
self.adv_target, reduction_indices=1, keep_dims=True))

else:
self.q = net_class(self.states, layers_sizes + [self.act_size], name='Q_primary',
self.q = net_class(self.states, self.layer_sizes + [self.act_size], name='Q_primary',
**net_params)
self.q_target = net_class(self.states_next, layers_sizes + [self.act_size],
self.q_target = net_class(self.states_next, self.layer_sizes + [self.act_size],
name='Q_target', **net_params)

# The primary and target Q networks should match.
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ matplotlib==1.5.3
setuptools==39.0.1
numpy==1.14.0
pandas==0.22.0
gym.egg==info
tensorflow==1.5.0
click==6.7
gym==0.10.5

0 comments on commit fab08fc

Please sign in to comment.