Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Florian Pineau DQN #47

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions RandomBird/FlappyAgent.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,33 @@
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers

def FlappyPolicy(state, screen):
action=None
if(np.random.randint(0,2)<1):
action=119
return action
dqn = Sequential()
# 1st layer
dqn.add(Dense(units=500, kernel_initializer='lecun_uniform', activation="relu", input_dim = 8))
# output layer
dqn.add(Dense(units=2, kernel_initializer='lecun_uniform', activation="linear"))

dqn.compile(loss='mse', optimizer=optimizers.Adam(1e-4))

dqn.load_weights("dqn_3_1.dqf")

batchSize = 256
actions = [None, 119]

def process_state(state):
""" Renvoie l'état sous forme de liste """
return [state['player_y'], state['player_vel'],
state['next_pipe_dist_to_player'], state['next_pipe_top_y'], state['next_pipe_bottom_y'],
state['next_next_pipe_dist_to_player'], state['next_next_pipe_top_y'], state['next_next_pipe_bottom_y']]

def greedy_action(network, state_x):
""" Renvoie la meilleure action possible """
Q = network.predict(np.array(state_x).reshape(1, len(state_x)), batch_size=batchSize)
return np.argmax(Q)

def FlappyPolicy(state, screen):
state = process_state(state)
action = greedy_action(dqn, state)
return actions[action]
Binary file added RandomBird/dqn_3_1.dqf
Binary file not shown.
150 changes: 150 additions & 0 deletions RandomBird/q_learning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""--------------------------------"""
""" Initialisation de Flappy Bird """
"""--------------------------------"""

from ple.games.flappybird import FlappyBird
from ple import PLE

# Définition des actions
actions = [None, 119]

game = FlappyBird(graphics="fixed")
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=True)
p.init()


"""----------------------------"""
""" Création du Deep Q-Network """
"""----------------------------"""

from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers

dqn = Sequential()
# 1st layer
dqn.add(Dense(units=500, kernel_initializer='lecun_uniform', activation="relu", input_dim = 8))
# output layer
dqn.add(Dense(units=2, kernel_initializer='lecun_uniform', activation="linear"))

dqn.compile(loss='mse', optimizer=optimizers.Adam(1e-4))

dqn.load_weights("dqn_2_1.dqf")
#dqn.load_weights("dqn_0_3.dqf")

"""-----------------------------------------"""
""" Définition de quelques fonctions utiles """
"""-----------------------------------------"""

import numpy as np

def process_state(state):
""" Renvoie l'état sous forme de liste """
return [state['player_y'], state['player_vel'],
state['next_pipe_dist_to_player'], state['next_pipe_top_y'], state['next_pipe_bottom_y'],
state['next_next_pipe_dist_to_player'], state['next_next_pipe_top_y'], state['next_next_pipe_bottom_y']]

def epsilon(step):
""" Utile à la décision de l'action """
#if step<1e6:
# return 1.-step*9e-7
#return .1
return 0.01

def clip_reward(r):
""" Change la valeur de reward """
rr=0
if r > 0:
rr = 1
if r < 0:
rr = -1000
return rr

def greedy_action(network, state_x):
""" Renvoie la meilleure action possible """
Q = network.predict(np.array(state_x).reshape(1, len(state_x)), batch_size=batchSize)
return np.argmax(Q)

def MCeval(network, games, gamma):
""" Evaluation du réseau de neurones """
scores = np.zeros(games)
for i in range(games):
p.reset_game()
state_x = process_state(game.getGameState())
step = -1
while not game.game_over():
step += 1
action = greedy_action(network, state_x)
reward = p.act(actions[action])
state_y = process_state(game.getGameState())
scores[i] = scores[i] + reward
state_x = state_y
return np.mean(scores)


"""----------------------"""
""" Apprentissage du DQN """
"""----------------------"""

# Variables utiles
total_games = 10000
gamma = 0.99
step = 0
batchSize = 256

# Définition des évaluations
evaluation_period = 300
nb_epochs = total_games // evaluation_period
epoch=-1
scoreMC = np.zeros((nb_epochs))

# Enregistrement du réseau de neurones
filename = "dqn_3_"


"""-----------------"""
""" Deep Q-Learning """
"""-----------------"""

for id_game in range(total_games):
if id_game % evaluation_period == 0:
epoch += 1
scoreMC[epoch] = MCeval(dqn, 50, gamma)
dqn.save(filename + str(epoch) + ".dqf")
print(">>> Eval n°%d | score = %f" % (epoch, scoreMC[epoch]))
p.reset_game() # Nouvelle partie
state_x = process_state(game.getGameState())
id_frame = 0
score = 0
alea = 0
while not game.game_over():
id_frame += 1
step += 1
## Choisit l'action à effectuer : 0 ou 1
if np.random.rand() < epsilon(step): # Action au hasard
alea += 1
action = np.random.choice([0, 1])
else: # Meilleure action possible
action = greedy_action(dqn, state_x)
## Joue l'action et observe le gain et l'état suivant
reward = p.act(actions[action])
reward = clip_reward(reward)
state_y = process_state(game.getGameState())
## Mise à jour de Q
QX = dqn.predict(np.array(state_x).reshape(1, len(state_x)), batch_size=batchSize)
y = np.zeros(2)
y[:] = QX[:]
if not game.game_over():
score += reward
QY = dqn.predict(np.array(state_y).reshape(1, len(state_y)), batch_size=batchSize)
QYmax = np.max(QY)
update = reward + gamma * QYmax
else:
update = reward
y[action] = update
dqn.fit(np.array(state_x).reshape(1, len(state_x)), np.array(y).reshape(1, len(y)), nb_epoch = 3, verbose = 0)
state_x = state_y
print(">>> game n°%d | score = %d | nb_steps = %d | %% aléa = %f%%" % (id_game, score, id_frame, alea/id_frame*100))

for i in nb_epochs:
print("epoch n°%d | score = %f" % (i, scoreMC[i]))