Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add files via upload #48

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions simonet/FlappyAgent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from state import new_state
import numpy as np
import pickle

opened = 0
Q = np.zeros((18,30,21,2))

def FlappyPolicy(state, screen):

global opened
global Q

if not opened :
file = open("Qtrained",'rb')
Q = pickle.load(file)
opened = 1

s = new_state(state)
action = np.argmax(Q[s[0],s[1],s[2]][:])
return action*119


Binary file added simonet/Qtrained
Binary file not shown.
Binary file added simonet/__pycache__/FlappyAgent.cpython-36.pyc
Binary file not shown.
Binary file added simonet/__pycache__/state.cpython-36.pyc
Binary file not shown.
29 changes: 29 additions & 0 deletions simonet/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# You're not allowed to change this file
from ple.games.flappybird import FlappyBird
from ple import PLE
import numpy as np
from FlappyAgent import FlappyPolicy

game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=False, display_screen=True)
# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.

p.init()
reward = 0.0

nb_games = 100
cumulated = np.zeros((nb_games))

for i in range(nb_games):
p.reset_game()

while(not p.game_over()):
state = game.getGameState()
screen = p.getScreenRGB()
action=FlappyPolicy(state, screen) ### Your job is to define this function.

reward = p.act(action)
cumulated[i] = cumulated[i] + reward

average_score = np.mean(cumulated)
max_score = np.max(cumulated)
7 changes: 7 additions & 0 deletions simonet/state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
def new_state(state):

x = int(round((state['next_pipe_dist_to_player'])/20))
y = int(round((state['player_y'] - state['next_pipe_bottom_y'])/20))+15
v = int(state['player_vel'])+10

return [x,y,v]
122 changes: 122 additions & 0 deletions simonet/trainning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import numpy as np
import pickle
from ple import PLE
from ple.games.flappybird import FlappyBird
from state import new_state

#Retourner l'action en fonction du argmax (0 ou 1)
def get_action(a):
return a*119

#Def epsilon greedy
def epsilon_greedy(Q, new_state, epsilon, state):
a = np.argmax(Q[new_state[0],new_state[1],new_state[2]])
if np.random.rand() <= epsilon :
if np.random.rand() <= 0.5 * epsilon:
if state['next_pipe_bottom_y'] - state['player_y'] < 50 :
a = 1
else :
a = 0

return a


# Parametres
gamma = 0.95
alpha = 0.9
epsilon = 0.1
nb_games = 15000

#taille de notre espace des états
X = 18
Y = 30
V = 21

# Init Q
Q = np.zeros((X,Y,V,2))
#file = open("Qtrained",'rb')
#Q = pickle.load(file)
#alpha = 0.1

#Création du jeu accéléré
game = FlappyBird(graphics="fancy")
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)

# Score des X dernières parties
last_100 = 0
last_1000 = 0

#calcul de l'espace des états
#Xmax = 0
#Ymax = 0
#Vmax = 0

#file=open('Qtrained', 'rb')
#Q=marshal.load(file)
# For each game
for g in range(1, nb_games):

# Début du jeu
p.init()
p.reset_game()
state = game.getGameState()
reward = training_reward = 0
s = new_state(state)
action = epsilon_greedy(Q, s, epsilon, state)

#calcul de l'espace des états
#if s[0] > Xmax:
# Xmax = s[0]
#if s[1] > Ymax:
# Ymax = s[1]
#if s[2] > Vmax:
# Vmax = s[2]

while not p.game_over():

# Action
reward = p.act(get_action(action))

# Calcul de la reward d'entrainement
if reward == -5 :
training_reward = -100
else:
training_reward = 1

# Nouvel état
state_ = game.getGameState()
s_ = new_state(state_)
action_ = epsilon_greedy(Q, s_, epsilon, state)

# calcul de Q avec l'algorythme SARSA
delta = (training_reward + gamma * Q[s_[0],s_[1],s_[2]][action_] - Q[s[0],s[1],s[2]][action])
Q[s[0],s[1],s[2]][action] = Q[s[0],s[1],s[2]][action] + alpha * delta

# Update de l'état
s = s_
action = action_

# Calcul des résultats en cours de compilation
if reward+5:
last_100 += reward
last_1000 += reward

# contrôle des résultats en cours de compilation et diminution de alpha
if g %100 == 0 :
print('Moyenne des 100 derniers essais : %.2f' %(last_100/100))
last_100 = 0
if g %1000 == 0 :
while alpha > 0.1 :
alpha /= 1.01
print('Moyenne des 1000 derniers essais : %2f' % (last_1000/1000))
if last_1000 / 1000 > 50:
break
last_1000 = 0


#Résultat de la taille de l'espace des états
#print(Xmax,Ymax,Vmax)

#Sauvegarde des données avec pickle, marshal ne marchant pas
with open('Qtrained', 'wb') as f:
pickle.dump(Q,f)