diff --git a/cartpole.py b/cartpole.py index d16f116..45a3949 100644 --- a/cartpole.py +++ b/cartpole.py @@ -34,7 +34,7 @@ def __init__(self, observation_space, action_space): self.model.add(Dense(24, input_shape=(observation_space,), activation="relu")) self.model.add(Dense(24, activation="relu")) self.model.add(Dense(self.action_space, activation="linear")) - self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE)) + self.model.compile(loss="mse", optimizer=Adam(learning_rate=LEARNING_RATE)) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) @@ -61,7 +61,7 @@ def experience_replay(self): def cartpole(): - env = gym.make(ENV_NAME) + env = gym.make(ENV_NAME) #render_mode='human' score_logger = ScoreLogger(ENV_NAME) observation_space = env.observation_space.shape[0] action_space = env.action_space.n @@ -69,20 +69,20 @@ def cartpole(): run = 0 while True: run += 1 - state = env.reset() + state, info = env.reset() state = np.reshape(state, [1, observation_space]) step = 0 while True: step += 1 #env.render() action = dqn_solver.act(state) - state_next, reward, terminal, info = env.step(action) + state_next, reward, terminal, truncated, info = env.step(action) reward = reward if not terminal else -reward state_next = np.reshape(state_next, [1, observation_space]) dqn_solver.remember(state, action, reward, state_next, terminal) state = state_next if terminal: - print "Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step) + print( "Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step)) score_logger.add_score(step, run) break dqn_solver.experience_replay() diff --git a/scores/score_logger.py b/scores/score_logger.py index 8538bc7..cfae865 100644 --- a/scores/score_logger.py +++ b/scores/score_logger.py @@ -38,10 +38,10 @@ def add_score(self, score, run): show_legend=True) self.scores.append(score) mean_score = mean(self.scores) - print "Scores: (min: " + str(min(self.scores)) + ", avg: " + str(mean_score) + ", max: " + str(max(self.scores)) + ")\n" + print( "Scores: (min: " + str(min(self.scores)) + ", avg: " + str(mean_score) + ", max: " + str(max(self.scores)) + ")\n" ) if mean_score >= AVERAGE_SCORE_TO_SOLVE and len(self.scores) >= CONSECUTIVE_RUNS_TO_SOLVE: solve_score = run-CONSECUTIVE_RUNS_TO_SOLVE - print "Solved in " + str(solve_score) + " runs, " + str(run) + " total runs." + print( "Solved in " + str(solve_score) + " runs, " + str(run) + " total runs." ) self._save_csv(SOLVED_CSV_PATH, solve_score) self._save_png(input_path=SOLVED_CSV_PATH, output_path=SOLVED_PNG_PATH, @@ -58,10 +58,12 @@ def _save_png(self, input_path, output_path, x_label, y_label, average_of_n_last y = [] with open(input_path, "r") as scores: reader = csv.reader(scores) - data = list(reader) - for i in range(0, len(data)): - x.append(int(i)) - y.append(int(data[i][0])) + i = 0 + for row in reader: + if row: + x.append(int(i)) + y.append(int(row[0])) + i += 1 plt.subplots() plt.plot(x, y, label="score per run")