diff --git a/config/default.ini b/config/default.ini index 6c41c811..6c923e0f 100644 --- a/config/default.ini +++ b/config/default.ini @@ -1,6 +1,7 @@ [base] package = None env_name = None +vec = native policy_name = Policy rnn_name = None max_suggestion_cost = 3600 diff --git a/config/ocean/connect4.ini b/config/ocean/connect4.ini index 64964e82..2ad624b7 100644 --- a/config/ocean/connect4.ini +++ b/config/ocean/connect4.ini @@ -1,6 +1,7 @@ [base] package = ocean env_name = puffer_connect4 +vec = multiprocessing policy_name = Policy rnn_name = Recurrent diff --git a/config/ocean/go.ini b/config/ocean/go.ini index 9790355f..a6397b0b 100644 --- a/config/ocean/go.ini +++ b/config/ocean/go.ini @@ -1,6 +1,7 @@ [base] package = ocean env_name = puffer_go +vec = multiprocessing policy_name = Go rnn_name = Recurrent diff --git a/config/ocean/grid.ini b/config/ocean/grid.ini index 95ec3f1f..0137d7a3 100644 --- a/config/ocean/grid.ini +++ b/config/ocean/grid.ini @@ -1,6 +1,7 @@ [base] package = ocean env_name = puffer_grid +vec = multiprocessing policy_name = Policy rnn_name = Recurrent diff --git a/config/ocean/moba.ini b/config/ocean/moba.ini index eaccece5..463739dd 100644 --- a/config/ocean/moba.ini +++ b/config/ocean/moba.ini @@ -1,6 +1,7 @@ [base] package = ocean env_name = puffer_moba +vec = multiprocessing policy_name = MOBA rnn_name = Recurrent diff --git a/config/ocean/nmmo3.ini b/config/ocean/nmmo3.ini index 3b4d8329..19b7c751 100644 --- a/config/ocean/nmmo3.ini +++ b/config/ocean/nmmo3.ini @@ -1,6 +1,7 @@ [base] package = ocean -env_name = nmmo3 +env_name = puffer_nmmo3 +vec = multiprocessing policy_name = NMMO3 rnn_name = NMMO3LSTM diff --git a/config/ocean/snake.ini b/config/ocean/snake.ini index 4954254a..182acee6 100644 --- a/config/ocean/snake.ini +++ b/config/ocean/snake.ini @@ -1,6 +1,7 @@ [base] package = ocean env_name = puffer_snake +vec = multiprocessing rnn_name = Recurrent [env] diff --git a/config/ocean/trash_pickup.ini b/config/ocean/trash_pickup.ini index 9a07defa..c22eea6d 100644 --- a/config/ocean/trash_pickup.ini +++ b/config/ocean/trash_pickup.ini @@ -1,6 +1,7 @@ [base] package = ocean env_name = trash_pickup puffer_trash_pickup +vec = multiprocessing policy_name = TrashPickup rnn_name = Recurrent diff --git a/demo.py b/demo.py index fbe93994..6a7192c0 100644 --- a/demo.py +++ b/demo.py @@ -199,13 +199,13 @@ def carbs_param(group, name, space, wandb_params, mmin=None, mmax=None, is_wandb_logging_enabled=False, resample_frequency=5, num_random_samples=len(param_spaces), - max_suggestion_cost=args['base']['max_suggestion_cost'], + max_suggestion_cost=args['max_suggestion_cost'], is_saved_on_every_observation=False, ) carbs = CARBS(carbs_params, param_spaces) # GPUDrive doesn't let you reinit the vecenv, so we have to cache it - cache_vecenv = args['base']['env_name'] == 'gpudrive' + cache_vecenv = args['env_name'] == 'gpudrive' elos = {'model_random.pt': 1000} vecenv = {'vecenv': None} # can't reassign otherwise @@ -293,7 +293,7 @@ def train(args, make_env, policy_cls, rnn_cls, wandb, elif args['vec'] == 'native': vec = pufferlib.environment.PufferEnv else: - raise ValueError(f'Invalid --vector (serial/multiprocessing/ray/native).') + raise ValueError(f'Invalid --vec (serial/multiprocessing/ray/native).') if vecenv is None: vecenv = pufferlib.vector.make( @@ -360,8 +360,6 @@ def train(args, make_env, policy_cls, rnn_cls, wandb, default='puffer_squared', help='Name of specific environment to run') parser.add_argument('--mode', type=str, default='train', choices='train eval evaluate sweep sweep-carbs autotune profile'.split()) - parser.add_argument('--vec', '--vector', '--vectorization', type=str, - default='native', choices=['serial', 'multiprocessing', 'ray', 'native']) parser.add_argument('--vec-overwork', action='store_true', help='Allow vectorization to use >1 worker/core. Not recommended.') parser.add_argument('--eval-model-path', type=str, default=None, @@ -377,6 +375,7 @@ def train(args, make_env, policy_cls, rnn_cls, wandb, parser.add_argument('--wandb-group', type=str, default='debug') args = parser.parse_known_args()[0] + file_paths = glob.glob('config/**/*.ini', recursive=True) for path in file_paths: p = configparser.ConfigParser() @@ -394,7 +393,10 @@ def train(args, make_env, policy_cls, rnn_cls, wandb, for section in p.sections(): for key in p[section]: - argparse_key = f'--{section}.{key}'.replace('_', '-') + if section == 'base': + argparse_key = f'--{key}'.replace('_', '-') + else: + argparse_key = f'--{section}.{key}'.replace('_', '-') parser.add_argument(argparse_key, default=p[section][key]) # Late add help so you get a dynamic menu based on the env @@ -416,7 +418,7 @@ def train(args, make_env, policy_cls, rnn_cls, wandb, except: prev[subkey] = value - package = args['base']['package'] + package = args['package'] module_name = f'pufferlib.environments.{package}' if package == 'ocean': module_name = 'pufferlib.ocean' @@ -425,12 +427,12 @@ def train(args, make_env, policy_cls, rnn_cls, wandb, env_module = importlib.import_module(module_name) make_env = env_module.env_creator(env_name) - policy_cls = getattr(env_module.torch, args['base']['policy_name']) + policy_cls = getattr(env_module.torch, args['policy_name']) - rnn_name = args['base']['rnn_name'] + rnn_name = args['rnn_name'] rnn_cls = None if rnn_name is not None: - rnn_cls = getattr(env_module.torch, args['base']['rnn_name']) + rnn_cls = getattr(env_module.torch, args['rnn_name']) if args['baseline']: assert args['mode'] in ('train', 'eval', 'evaluate') diff --git a/pufferlib/ocean/breakout/breakout.h b/pufferlib/ocean/breakout/breakout.h index 76fb81dc..96e45fb8 100644 --- a/pufferlib/ocean/breakout/breakout.h +++ b/pufferlib/ocean/breakout/breakout.h @@ -437,7 +437,7 @@ void reset_round(Breakout* env) { env->ball_vx = 0.0; env->ball_vy = 0.0; } -void reset(Breakout* env) { +void c_reset(Breakout* env) { env->log = (Log){0}; env->score = 0; env->num_balls = 5; @@ -482,11 +482,11 @@ void step_frame(Breakout* env, int action) { env->dones[0] = 1; env->log.score = env->score; add_log(env->log_buffer, &env->log); - reset(env); + c_reset(env); } } -void step(Breakout* env) { +void c_step(Breakout* env) { env->dones[0] = 0; env->log.episode_length += 1; env->rewards[0] = 0.0; @@ -523,7 +523,7 @@ Client* make_client(Breakout* env) { return client; } -void render(Client* client, Breakout* env) { +void c_render(Client* client, Breakout* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } diff --git a/pufferlib/ocean/breakout/cy_breakout.pyx b/pufferlib/ocean/breakout/cy_breakout.pyx index 484d9266..443fd896 100644 --- a/pufferlib/ocean/breakout/cy_breakout.pyx +++ b/pufferlib/ocean/breakout/cy_breakout.pyx @@ -55,9 +55,9 @@ cdef extern from "breakout.h": Client* make_client(Breakout* env) void close_client(Client* client) - void render(Client* client, Breakout* env) - void reset(Breakout* env) - void step(Breakout* env) + void c_render(Client* client, Breakout* env) + void c_reset(Breakout* env) + void c_step(Breakout* env) cdef class CyBreakout: cdef: @@ -103,12 +103,12 @@ cdef class CyBreakout: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef Breakout* env = &self.envs[0] @@ -119,7 +119,7 @@ cdef class CyBreakout: self.client = make_client(env) os.chdir(cwd) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/connect4/connect4.h b/pufferlib/ocean/connect4/connect4.h index 5fb2bd59..cbb0261b 100644 --- a/pufferlib/ocean/connect4/connect4.h +++ b/pufferlib/ocean/connect4/connect4.h @@ -276,7 +276,7 @@ void compute_observation(CConnect4* env) { } } -void reset(CConnect4* env) { +void c_reset(CConnect4* env) { env->log = (Log){0}; env->dones[0] = NOT_DONE; env->player_pieces = 0; @@ -294,13 +294,13 @@ void finish_game(CConnect4* env, float reward) { compute_observation(env); } -void step(CConnect4* env) { +void c_step(CConnect4* env) { env->log.episode_length += 1; env->rewards[0] = 0.0; if (env->dones[0] == DONE) { add_log(env->log_buffer, &env->log); - reset(env); + c_reset(env); return; } @@ -359,7 +359,7 @@ Client* make_client(int width, int height) { return client; } -void render(Client* client, CConnect4* env) { +void c_render(Client* client, CConnect4* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } diff --git a/pufferlib/ocean/connect4/cy_connect4.pyx b/pufferlib/ocean/connect4/cy_connect4.pyx index abca0fb3..6eb39590 100644 --- a/pufferlib/ocean/connect4/cy_connect4.pyx +++ b/pufferlib/ocean/connect4/cy_connect4.pyx @@ -36,9 +36,9 @@ cdef extern from "connect4.h": void free_cconnect4(CConnect4* env) Client* make_client(float width, float height) void close_client(Client* client) - void render(Client* client, CConnect4* env) - void reset(CConnect4* env) - void step(CConnect4* env) + void c_render(Client* client, CConnect4* env) + void c_reset(CConnect4* env) + void c_step(CConnect4* env) cdef class CyConnect4: cdef: @@ -75,12 +75,12 @@ cdef class CyConnect4: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef CConnect4* env = &self.envs[0] @@ -91,7 +91,7 @@ cdef class CyConnect4: self.client = make_client(env.width, env.height) os.chdir(cwd) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/enduro/cy_enduro.pyx b/pufferlib/ocean/enduro/cy_enduro.pyx index 40e9ba6b..4baed1cf 100644 --- a/pufferlib/ocean/enduro/cy_enduro.pyx +++ b/pufferlib/ocean/enduro/cy_enduro.pyx @@ -52,7 +52,7 @@ cdef extern from "enduro.h": void free_logbuffer(LogBuffer* buffer) Log aggregate_and_clear(LogBuffer* logs) void init(Enduro* env, int seed, int env_index) - void reset(Enduro* env) + void c_reset(Enduro* env) void c_step(Enduro* env) void c_render(Client* client, Enduro* env) Client* make_client(Enduro* env) @@ -103,15 +103,15 @@ cdef class CyEnduro: self.envs[i].log_buffer = self.logs self.envs[i].obs_size = observations.shape[1] - if i % 100 == 0: - print(f"Initializing environment #{i} with seed {unique_seed}") + #if i % 100 == 0: + # print(f"Initializing environment #{i} with seed {unique_seed}") init(&self.envs[i], unique_seed, i) def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i diff --git a/pufferlib/ocean/enduro/enduro.h b/pufferlib/ocean/enduro/enduro.h index 7e442e18..6369413a 100644 --- a/pufferlib/ocean/enduro/enduro.h +++ b/pufferlib/ocean/enduro/enduro.h @@ -475,7 +475,7 @@ void allocate(Enduro* env); void init(Enduro* env, int seed, int env_index); void free_allocated(Enduro* env); void reset_round(Enduro* env); -void reset(Enduro* env); +void c_reset(Enduro* env); unsigned char check_collision(Enduro* env, Car* car); int get_player_lane(Enduro* env); float get_car_scale(float y); @@ -865,7 +865,7 @@ void reset_round(Enduro* env) { } // Reset all init vars; only called once after init -void reset(Enduro* env) { +void c_reset(Enduro* env) { // No random after first reset int reset_seed = (env->reset_count == 0) ? xorshift32(&env->rng_state) : 0; diff --git a/pufferlib/ocean/go/cy_go.pyx b/pufferlib/ocean/go/cy_go.pyx index 5b39e303..d8595c40 100644 --- a/pufferlib/ocean/go/cy_go.pyx +++ b/pufferlib/ocean/go/cy_go.pyx @@ -30,8 +30,6 @@ cdef extern from "go.h": int find(Group*) void union_groups(Group*, int, int) - - ctypedef struct CGo: float* observations int* actions @@ -68,12 +66,12 @@ cdef extern from "go.h": void init(CGo* env) void free_initialized(CGo* env) - void reset(CGo* env) - void step(CGo* env) + void c_reset(CGo* env) + void c_step(CGo* env) Client* make_client(float width, float height) void close_client(Client* client) - void render(Client* client, CGo* env) + void c_render(Client* client, CGo* env) cdef class CyGo: @@ -122,19 +120,19 @@ cdef class CyGo: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef CGo* env = &self.envs[0] if self.client == NULL: self.client = make_client(env.width,env.height) - render(self.client, &self.envs[0]) + c_render(self.client, &self.envs[0]) def close(self): if self.client != NULL: @@ -144,4 +142,4 @@ cdef class CyGo: def log(self): cdef Log log = aggregate_and_clear(self.logs) - return log \ No newline at end of file + return log diff --git a/pufferlib/ocean/go/go.h b/pufferlib/ocean/go/go.h index ca58ad53..afd51f04 100644 --- a/pufferlib/ocean/go/go.h +++ b/pufferlib/ocean/go/go.h @@ -648,7 +648,7 @@ void enemy_greedy_easy(CGo* env){ enemy_random_move(env); } -void reset(CGo* env) { +void c_reset(CGo* env) { env->log = (Log){0}; env->dones[0] = 0; env->score = 0; @@ -687,10 +687,10 @@ void end_game(CGo* env){ env->log.games_played++; env->log.episode_return += env->rewards[0]; add_log(env->log_buffer, &env->log); - reset(env); + c_reset(env); } -void step(CGo* env) { +void c_step(CGo* env) { env->log.episode_length += 1; env->rewards[0] = 0.0; int action = (int)env->actions[0]; @@ -767,7 +767,7 @@ Client* make_client(int width, int height) { return client; } -void render(Client* client, CGo* env) { +void c_render(Client* client, CGo* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } @@ -839,4 +839,4 @@ void render(Client* client, CGo* env) { void close_client(Client* client) { CloseWindow(); free(client); -} \ No newline at end of file +} diff --git a/pufferlib/ocean/moba/cy_moba.pyx b/pufferlib/ocean/moba/cy_moba.pyx index 13267bec..7f74b9d6 100644 --- a/pufferlib/ocean/moba/cy_moba.pyx +++ b/pufferlib/ocean/moba/cy_moba.pyx @@ -198,8 +198,8 @@ cdef extern from "moba.h": unsigned char* read_file(char* filename) - void reset(MOBA* env) - void step(MOBA* env) + void c_reset(MOBA* env) + void c_step(MOBA* env) void randomize_tower_hp(MOBA* env) cpdef entity_dtype(): @@ -267,12 +267,12 @@ cdef class CyMOBA: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self, int tick): if self.client == NULL: diff --git a/pufferlib/ocean/moba/moba.h b/pufferlib/ocean/moba/moba.h index 57717f9e..3243433c 100644 --- a/pufferlib/ocean/moba/moba.h +++ b/pufferlib/ocean/moba/moba.h @@ -1820,7 +1820,7 @@ MOBA* allocate_moba(MOBA* env) { return env; } -void reset(MOBA* env) { +void c_reset(MOBA* env) { //map->pids[:] = -1 //randomize_tower_hp(env); @@ -1890,7 +1890,7 @@ void reset(MOBA* env) { compute_observations(env); } -void step(MOBA* env) { +void c_step(MOBA* env) { for (int pid = 0; pid < NUM_ENTITIES; pid++) { Entity* entity = &env->entities[pid]; entity->target_pid = -1; @@ -1997,7 +1997,7 @@ void step(MOBA* env) { log.dire_carry = env->log[9]; add_log(env->log_buffer, &log); if (do_reset) { - reset(env); + c_reset(env); } } compute_observations(env); @@ -2261,7 +2261,7 @@ int render_game(GameRenderer* renderer, MOBA* env, int frame) { } } if (IsKeyDown(KEY_ESCAPE)) { - return 1; + exit(0); } if (HUMAN_CONTROL) { if (IsKeyDown(KEY_Q) || IsKeyPressed(KEY_Q)) { diff --git a/pufferlib/ocean/nmmo3/cy_nmmo3.pyx b/pufferlib/ocean/nmmo3/cy_nmmo3.pyx index 0c909194..65d03709 100644 --- a/pufferlib/ocean/nmmo3/cy_nmmo3.pyx +++ b/pufferlib/ocean/nmmo3/cy_nmmo3.pyx @@ -138,8 +138,8 @@ cdef extern from "nmmo3.h": int tick(Client* client, MMO* env, float delta) void init_mmo(MMO* env) - void reset(MMO* env, int seed) - void step(MMO* env) + void c_reset(MMO* env, int seed) + void c_step(MMO* env) cpdef entity_dtype(): '''Make a dummy entity to get the dtype''' @@ -226,13 +226,13 @@ cdef class Environment: cdef int i for i in range(self.num_envs): # TODO: Seed - reset(&self.envs[i], i+1) + c_reset(&self.envs[i], i+1) # Do I need to reset terrain here? def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def pids(self): ary = np.zeros((512, 512), dtype=np.intc) diff --git a/pufferlib/ocean/nmmo3/nmmo3.h b/pufferlib/ocean/nmmo3/nmmo3.h index c7794bc7..138ce1e1 100644 --- a/pufferlib/ocean/nmmo3/nmmo3.h +++ b/pufferlib/ocean/nmmo3/nmmo3.h @@ -1669,7 +1669,7 @@ void enemy_ai(MMO* env, int pid) { wander(env, pid); } -void reset(MMO* env, int seed) { +void c_reset(MMO* env, int seed) { srand(time(NULL)); env->tick = 0; @@ -1873,7 +1873,7 @@ void reset(MMO* env, int seed) { compute_all_obs(env); } -void step(MMO* env) { +void c_step(MMO* env) { env->tick += 1; int tick = env->tick; @@ -2576,7 +2576,7 @@ void close_client(Client* client) { UnloadRenderTexture(client->ui_buffer); for (int i = 0; i < NUM_PLAYER_TEXTURES; i++) { for (int element = 0; element < 5; element++) { - UnloadTexture(client->players[i][element]); + UnloadTexture(client->players[element][i]); } } UnloadFont(client->font); diff --git a/pufferlib/ocean/pong/cy_pong.pyx b/pufferlib/ocean/pong/cy_pong.pyx index 75c652ee..5a4eff86 100644 --- a/pufferlib/ocean/pong/cy_pong.pyx +++ b/pufferlib/ocean/pong/cy_pong.pyx @@ -53,12 +53,12 @@ cdef extern from "pong.h": ctypedef struct Client void init(Pong* env) - void reset(Pong* env) - void step(Pong* env) + void c_reset(Pong* env) + void c_step(Pong* env) Client* make_client(Pong* env) void close_client(Client* client) - void render(Client* client, Pong* env) + void c_render(Client* client, Pong* env) cdef class CyPong: cdef: @@ -113,12 +113,12 @@ cdef class CyPong: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef Pong* env = &self.envs[0] @@ -129,7 +129,7 @@ cdef class CyPong: self.client = make_client(env) os.chdir(cwd) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/pong/pong.h b/pufferlib/ocean/pong/pong.h index e08498af..5ac28692 100644 --- a/pufferlib/ocean/pong/pong.h +++ b/pufferlib/ocean/pong/pong.h @@ -147,7 +147,7 @@ void reset_round(Pong* env) { env->n_bounces = 0; } -void reset(Pong* env) { +void c_reset(Pong* env) { env->log = (Log){0}; reset_round(env); env->score_l = 0; @@ -155,7 +155,7 @@ void reset(Pong* env) { compute_observations(env); } -void step(Pong* env) { +void c_step(Pong* env) { env->tick += 1; env->log.episode_length += 1; env->rewards[0] = 0; @@ -213,7 +213,7 @@ void step(Pong* env) { if (env->score_r == env->max_score) { env->terminals[0] = 1; add_log(env->log_buffer, &env->log); - reset(env); + c_reset(env); return; } else { reset_round(env); @@ -246,7 +246,7 @@ void step(Pong* env) { if (env->score_l == env->max_score) { env->terminals[0] = 1; add_log(env->log_buffer, &env->log); - reset(env); + c_reset(env); return; } else { reset_round(env); @@ -302,7 +302,7 @@ void close_client(Client* client) { free(client); } -void render(Client* client, Pong* env) { +void c_render(Client* client, Pong* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } diff --git a/pufferlib/ocean/rware/cy_rware.pyx b/pufferlib/ocean/rware/cy_rware.pyx index b796929a..7364d125 100644 --- a/pufferlib/ocean/rware/cy_rware.pyx +++ b/pufferlib/ocean/rware/cy_rware.pyx @@ -51,9 +51,9 @@ cdef extern from "rware.h": Client* make_client(CRware* env) void close_client(Client* client) - void render(Client* client, CRware* env) - void reset(CRware* env) - void step(CRware* env) + void c_render(Client* client, CRware* env) + void c_reset(CRware* env) + void c_step(CRware* env) cdef class CyRware: cdef: @@ -96,12 +96,12 @@ cdef class CyRware: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef CRware* env = &self.envs[0] @@ -112,7 +112,7 @@ cdef class CyRware: self.client = make_client(env) os.chdir(cwd) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/rware/rware.h b/pufferlib/ocean/rware/rware.h index 14539946..be73651b 100644 --- a/pufferlib/ocean/rware/rware.h +++ b/pufferlib/ocean/rware/rware.h @@ -405,7 +405,7 @@ void compute_observations(CRware* env) { } } -void reset(CRware* env) { +void c_reset(CRware* env) { env->dones[0] = 0; // set agents in center @@ -720,7 +720,7 @@ void process_tree_movements(CRware* env, MovementGraph* graph) { } } -void step(CRware* env) { +void c_step(CRware* env) { memset(env->rewards, 0, env->num_agents * sizeof(float)); MovementGraph* graph = env->movement_graph; for (int i = 0; i < env->num_agents; i++) { @@ -778,7 +778,7 @@ Client* make_client(CRware* env) { return client; } -void render(Client* client, CRware* env) { +void c_render(Client* client, CRware* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } diff --git a/pufferlib/ocean/snake/cy_snake.pyx b/pufferlib/ocean/snake/cy_snake.pyx index 0f0cd5a8..d7ddb5a6 100644 --- a/pufferlib/ocean/snake/cy_snake.pyx +++ b/pufferlib/ocean/snake/cy_snake.pyx @@ -47,12 +47,12 @@ cdef extern from "snake.h": void compute_observations(CSnake* env) void spawn_snake(CSnake* env, int snake_id) void spawn_food(CSnake* env) - void reset(CSnake* env) + void c_reset(CSnake* env) void step_snake(CSnake* env, int i) - void step(CSnake* env) + void c_step(CSnake* env) ctypedef struct Client Client* make_client(int cell_size, int width, int height) - void render(Client* client, CSnake* env) + void c_render(Client* client, CSnake* env) void close_client(Client* client) cdef class CySnake: @@ -100,19 +100,19 @@ cdef class CySnake: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self, cell_size=8): cdef CSnake* env = &self.envs[0] if self.client == NULL: self.client = make_client(cell_size, env.width, env.height) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/snake/snake.h b/pufferlib/ocean/snake/snake.h index 2c1d7474..48fa2509 100644 --- a/pufferlib/ocean/snake/snake.h +++ b/pufferlib/ocean/snake/snake.h @@ -194,7 +194,7 @@ void spawn_food(CSnake* env) { env->grid[idx] = FOOD; } -void reset(CSnake* env) { +void c_reset(CSnake* env) { env->window = 2*env->vision+1; env->obs_size = env->window*env->window; @@ -300,7 +300,7 @@ void step_snake(CSnake* env, int i) { env->grid[next_r*env->width + next_c] = env->snake_colors[i]; } -void step(CSnake* env){ +void c_step(CSnake* env){ for (int i = 0; i < env->num_snakes; i++) step_snake(env, i); @@ -342,7 +342,10 @@ void close_client(Client* client) { free(client); } -void render(Client* client, CSnake* env) { +void c_render(Client* client, CSnake* env) { + if (IsKeyDown(KEY_ESCAPE)) { + exit(0); + } BeginDrawing(); ClearBackground(COLORS[0]); int sz = client->cell_size; diff --git a/pufferlib/ocean/squared/cy_squared.pyx b/pufferlib/ocean/squared/cy_squared.pyx index 249f0376..d90f0dd6 100644 --- a/pufferlib/ocean/squared/cy_squared.pyx +++ b/pufferlib/ocean/squared/cy_squared.pyx @@ -14,12 +14,11 @@ cdef extern from "squared.h": ctypedef struct Client - void reset(Squared* env) - void step(Squared* env) - + void c_reset(Squared* env) + void c_step(Squared* env) Client* make_client(Squared* env) void close_client(Client* client) - void render(Client* client, Squared* env) + void c_render(Client* client, Squared* env) cdef class CySquared: cdef: @@ -48,19 +47,19 @@ cdef class CySquared: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef Squared* env = &self.envs[0] if self.client == NULL: self.client = make_client(env) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/squared/squared.h b/pufferlib/ocean/squared/squared.h index 74a7de6f..77cf6f50 100644 --- a/pufferlib/ocean/squared/squared.h +++ b/pufferlib/ocean/squared/squared.h @@ -38,7 +38,7 @@ void free_allocated(Squared* env) { free(env->terminals); } -void reset(Squared* env) { +void c_reset(Squared* env) { memset(env->observations, 0, env->size*env->size*sizeof(unsigned char)); env->observations[env->size*env->size/2] = AGENT; env->r = env->size/2; @@ -51,7 +51,7 @@ void reset(Squared* env) { env->observations[target_idx] = TARGET; } -void step(Squared* env) { +void c_step(Squared* env) { int action = env->actions[0]; env->terminals[0] = 0; env->rewards[0] = 0; @@ -75,7 +75,7 @@ void step(Squared* env) { || env->c >= env->size) { env->terminals[0] = 1; env->rewards[0] = -1.0; - reset(env); + c_reset(env); return; } @@ -83,7 +83,7 @@ void step(Squared* env) { if (env->observations[pos] == TARGET) { env->terminals[0] = 1; env->rewards[0] = 1.0; - reset(env); + c_reset(env); return; } @@ -111,7 +111,7 @@ void close_client(Client* client) { free(client); } -void render(Client* client, Squared* env) { +void c_render(Client* client, Squared* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index e4d5c0cb..ce99d7dc 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -300,7 +300,6 @@ def decode_actions(self, flat_hidden, lookup, concat=None): class TrashPickup(nn.Module): def __init__(self, env, cnn_channels=32, hidden_size=128, **kwargs): super().__init__() - self.agent_sight_range = env.agent_sight_range self.network= nn.Sequential( pufferlib.pytorch.layer_init( nn.Conv2d(5, cnn_channels, 5, stride=3)), @@ -323,9 +322,7 @@ def forward(self, observations): return actions, value def encode_observations(self, observations): - crop_size = 2 * self.agent_sight_range + 1 - observations = observations.view(-1, 5, crop_size, crop_size).float() - #observations = observations.view(-1, crop_size, crop_size, 5).permute(0, 3, 1, 2).float() + observations = observations.view(-1, 5, 11, 11).float() return self.network(observations), None def decode_actions(self, flat_hidden, lookup, concat=None): diff --git a/pufferlib/ocean/trash_pickup/cy_trash_pickup.pyx b/pufferlib/ocean/trash_pickup/cy_trash_pickup.pyx index 19d25f81..caf4cacf 100644 --- a/pufferlib/ocean/trash_pickup/cy_trash_pickup.pyx +++ b/pufferlib/ocean/trash_pickup/cy_trash_pickup.pyx @@ -37,10 +37,9 @@ cdef extern from "trash_pickup.h": Client* make_client(CTrashPickupEnv* env) void close_client(Client* client) - void render(Client* client, CTrashPickupEnv* env) - - void reset(CTrashPickupEnv* env) - void step(CTrashPickupEnv* env) + void c_render(Client* client, CTrashPickupEnv* env) + void c_reset(CTrashPickupEnv* env) + void c_step(CTrashPickupEnv* env) cdef class CyTrashPickup: cdef: @@ -83,19 +82,19 @@ cdef class CyTrashPickup: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef CTrashPickupEnv* env = &self.envs[0] if self.client == NULL: self.client = make_client(env) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/trash_pickup/trash_pickup.h b/pufferlib/ocean/trash_pickup/trash_pickup.h index aa460cc7..ba7c441a 100644 --- a/pufferlib/ocean/trash_pickup/trash_pickup.h +++ b/pufferlib/ocean/trash_pickup/trash_pickup.h @@ -167,8 +167,6 @@ void compute_observations(CTrashPickupEnv* env) { // Local crop version void compute_observations(CTrashPickupEnv* env) { int sight_range = env->agent_sight_range; - int num_cell_types = 4; // EMPTY, TRASH, BIN, AGENT - char* obs = env->observations; int obs_dim = 2*env->agent_sight_range + 1; @@ -354,7 +352,7 @@ bool is_episode_over(CTrashPickupEnv* env) { return true; } -void reset(CTrashPickupEnv* env) { +void c_reset(CTrashPickupEnv* env) { env->current_step = 0; env->total_episode_reward = 0; @@ -383,7 +381,7 @@ void initialize_env(CTrashPickupEnv* env) { env->entities = (Entity*)calloc(env->num_agents + env->num_bins + env->num_trash, sizeof(Entity)); env->total_num_obs = env->num_agents * ((((env->agent_sight_range * 2 + 1) * (env->agent_sight_range * 2 + 1)) * 5)); - reset(env); + c_reset(env); } void allocate(CTrashPickupEnv* env) { @@ -397,7 +395,7 @@ void allocate(CTrashPickupEnv* env) { initialize_env(env); } -void step(CTrashPickupEnv* env) { +void c_step(CTrashPickupEnv* env) { // Reset reward for each agent memset(env->rewards, 0, sizeof(float) * env->num_agents); memset(env->dones, 0, sizeof(unsigned char) * env->num_agents); @@ -427,7 +425,7 @@ void step(CTrashPickupEnv* env) { add_log(env->log_buffer, &log); - reset(env); + c_reset(env); } compute_observations(env); @@ -479,7 +477,11 @@ Client* make_client(CTrashPickupEnv* env) { } // Render the TrashPickup environment -void render(Client* client, CTrashPickupEnv* env) { +void c_render(Client* client, CTrashPickupEnv* env) { + if (IsKeyDown(KEY_ESCAPE)) { + exit(0); + } + BeginDrawing(); ClearBackground(PUFF_BACKGROUND); diff --git a/pufferlib/ocean/tripletriad/cy_tripletriad.pyx b/pufferlib/ocean/tripletriad/cy_tripletriad.pyx index 903659c0..561f2352 100644 --- a/pufferlib/ocean/tripletriad/cy_tripletriad.pyx +++ b/pufferlib/ocean/tripletriad/cy_tripletriad.pyx @@ -43,9 +43,9 @@ cdef extern from "tripletriad.h": Client* make_client(float width, float height) void close_client(Client* client) - void render(Client* client, CTripleTriad* env) - void reset(CTripleTriad* env) - void step(CTripleTriad* env) + void c_render(Client* client, CTripleTriad* env) + void c_reset(CTripleTriad* env) + void c_step(CTripleTriad* env) cdef class CyTripleTriad: cdef: @@ -82,19 +82,19 @@ cdef class CyTripleTriad: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef CTripleTriad* env = &self.envs[0] if self.client == NULL: self.client = make_client(env.width, env.height) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/tripletriad/tripletriad.h b/pufferlib/ocean/tripletriad/tripletriad.h index 2fdc5fb8..48e0894f 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.h +++ b/pufferlib/ocean/tripletriad/tripletriad.h @@ -294,7 +294,7 @@ void compute_observations(CTripleTriad* env) { } } -void reset(CTripleTriad* env) { +void c_reset(CTripleTriad* env) { env->log = (Log){0}; env->game_over = 0; for(int i=0; i< 2; i++) { @@ -490,7 +490,7 @@ void check_card_conversions(CTripleTriad* env, int card_placement, int player) { } } -void step(CTripleTriad* env) { +void c_step(CTripleTriad* env) { env->log.episode_length += 1; env->rewards[0] = 0.0; int action = env->actions[0]; @@ -499,7 +499,7 @@ void step(CTripleTriad* env) { env->log.score = env->score[0]; add_log(env->log_buffer, &env->log); //printf("Log: %f, %f, %f\n", env->log.episode_return, env->log.episode_length, env->log.score); - reset(env); + c_reset(env); return; } // select a card if the card is in the range of 1-5 and the card is not placed @@ -575,7 +575,7 @@ Client* make_client(int width, int height) { return client; } -void render(Client* client, CTripleTriad* env) { +void c_render(Client* client, CTripleTriad* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } diff --git a/setup.py b/setup.py index 1094c7a8..dd9bc793 100644 --- a/setup.py +++ b/setup.py @@ -278,11 +278,23 @@ library_dirs=['raylib/lib'], libraries=["raylib"], runtime_library_dirs=["raylib/lib"], - extra_compile_args=['-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION', '-DPLATFORM_DESKTOP', '-O2', '-Wno-alloc-size-larger-than'],#, '-g'], - extra_link_args=[rpath_arg] - + extra_compile_args=['-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION', '-DPLATFORM_DESKTOP', '-O2', '-Wno-alloc-size-larger-than', '-fwrapv'],#, '-g'], + extra_link_args=[rpath_arg, '-Bsymbolic-functions', '-O2', '--enable-new-dtags', '-fwrapv'] ) for path in extension_paths] - + +# Prevent Conda from injecting garbage compile flags +from distutils.sysconfig import get_config_vars +cfg_vars = get_config_vars() +for key in ('CC', 'CXX', 'LDSHARED'): + if cfg_vars[key]: + cfg_vars[key] = cfg_vars[key].replace('-B /root/anaconda3/compiler_compat', '') + cfg_vars[key] = cfg_vars[key].replace('-pthread', '') + cfg_vars[key] = cfg_vars[key].replace('-fno-strict-overflow', '') + +for key, value in cfg_vars.items(): + if value and '-fno-strict-overflow' in str(value): + cfg_vars[key] = value.replace('-fno-strict-overflow', '') + setup( name="pufferlib", description="PufferAI Library" @@ -295,7 +307,7 @@ }, include_package_data=True, install_requires=[ - 'numpy==1.23.3', + 'numpy>=1.23.3', 'opencv-python==3.4.17.63', 'cython>=3.0.0', 'rich',