PufferAI · thedch · Dec 24, 2024 · Dec 24, 2024 · Dec 24, 2024 · Dec 24, 2024
diff --git a/.gitignore b/.gitignore
@@ -139,3 +139,10 @@ checkpoints/
 experiments/
 wandb/
 raylib/
+
+c_gae.c
+pufferlib/extensions.c
+pufferlib/ocean/grid/c_grid.c
+pufferlib/ocean/tactical/c_tactical.c
+pufferlib/puffernet.c
+raylib_wasm/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,13 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.2.0
+    hooks:
+    -   id: trailing-whitespace
+    -   id: end-of-file-fixer
+    -   id: check-yaml
+    -   id: check-added-large-files
+
+# TODO: Add ruff + ruff format for nice linting and consistency
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -3,4 +3,3 @@ global-include *.pxd
 global-include *.h
 global-include *.py
 recursive-include pufferlib/resources *
-
diff --git a/README.md b/README.md
@@ -19,3 +19,7 @@ All of our documentation is hosted at [puffer.ai](https://puffer.ai "PufferLib D
    <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=pufferai/pufferlib&type=Date" />
  </picture>
 </a>
+
+## Contributions
+
+We're always looking for new contributors! When first starting out, don't forget to run `pre-commit install` before committing.
diff --git a/c_gae.pyx b/c_gae.pyx
@@ -30,5 +30,3 @@ def compute_gae(cnp.ndarray dones, cnp.ndarray values,
         c_advantages[t_cur] = lastgaelam
 
     return advantages
-
-
diff --git a/clean_pufferl.py b/clean_pufferl.py
@@ -448,7 +448,7 @@ def store(self, obs, value, action, logprob, reward, done, env_id, mask):
         ptr = self.ptr
         indices = torch.where(mask)[0].numpy()[:self.batch_size - ptr]
         end = ptr + len(indices)
- 
+
         self.obs[ptr:end] = obs.to(self.obs.device)[indices]
         self.values_np[ptr:end] = value.cpu().numpy()[indices]
         self.actions_np[ptr:end] = action[indices]
@@ -694,7 +694,7 @@ def print_dashboard(env_name, utilization, global_step, epoch,
         f'{c1}DRAM: {c3}{dram_percent:.1f}%',
         f'{c1}VRAM: {c3}{vram_percent:.1f}%',
     )
-        
+
     s = Table(box=None, expand=True)
     s.add_column(f"{c1}Summary", justify='left', vertical='top', width=16)
     s.add_column(f"{c1}Value", justify='right', vertical='top', width=8)

diff --git a/config/atari/default.ini b/config/atari/default.ini
@@ -31,7 +31,7 @@ max = 10
 #distribution = uniform
 #min = 0
 #max = 1
- 
+
 [sweep.parameters.train.parameters.total_timesteps]
 distribution = uniform
 min = 5_000_000

diff --git a/config/atari/enduro.ini b/config/atari/enduro.ini
@@ -1,6 +1,6 @@
 [base]
 package = atari
-env_name = enduro 
+env_name = enduro
 
 [train]
 total_timesteps = 16_657_125

diff --git a/config/gpudrive.ini b/config/gpudrive.ini
@@ -42,5 +42,3 @@ max = 524288
 distribution = uniform
 min = 2048
 max = 32768
-
-
diff --git a/config/mujoco.ini b/config/mujoco.ini
@@ -35,4 +35,4 @@ batch_size = 2048
 minibatch_size = 32
 bptt_horizon = 1
 compile = False
-compile_mode = reduce-overhead
+compile_mode = reduce-overhead
diff --git a/config/ocean/enduro.ini b/config/ocean/enduro.ini
@@ -1,6 +1,6 @@
 [base]
 package = ocean
-env_name = puffer_enduro 
+env_name = puffer_enduro
 policy_name = Policy
 rnn_name = Recurrent
 

diff --git a/config/ocean/moba.ini b/config/ocean/moba.ini
@@ -51,7 +51,7 @@ max = 0.05
 distribution = uniform
 min = 0.0
 max = 5.0
- 
+
 [sweep.parameters.train.parameters.total_timesteps]
 distribution = uniform
 min = 200_000_000

diff --git a/config/ocean/nmmo3.ini b/config/ocean/nmmo3.ini
@@ -58,4 +58,3 @@ max = 0.0
 distribution = uniform
 min = 1_000_000_000
 max = 10_000_000_000
-
diff --git a/config/ocean/pong.ini b/config/ocean/pong.ini
@@ -1,6 +1,6 @@
 [base]
 package = ocean
-env_name = puffer_pong 
+env_name = puffer_pong
 policy_name = Policy
 rnn_name = Recurrent
 

diff --git a/config/ocean/rware.ini b/config/ocean/rware.ini
@@ -26,6 +26,3 @@ gae_lambda = 0.8297991396183212
 vf_coef = 0.3974834958825928
 clip_coef = 0.1
 vf_clip_coef = 0.1
-
-
-
diff --git a/config/ocean/trash_pickup.ini b/config/ocean/trash_pickup.ini
@@ -1,11 +1,11 @@
 [base]
 package = ocean
-env_name = trash_pickup puffer_trash_pickup 
+env_name = trash_pickup puffer_trash_pickup
 policy_name = TrashPickup
 rnn_name = Recurrent
 
 [env]
-num_envs = 1024  # Recommended: 4096 (recommended start value) / num_agents 
+num_envs = 1024  # Recommended: 4096 (recommended start value) / num_agents
 grid_size = 10
 num_agents = 4
 num_trash = 20

diff --git a/demo.py b/demo.py
@@ -20,7 +20,7 @@
 signal.signal(signal.SIGINT, lambda sig, frame: os._exit(0))
 
 import clean_pufferl
-   
+
 def make_policy(env, policy_cls, rnn_cls, args):
     policy = policy_cls(env, **args['policy'])
     if rnn_cls is not None:
@@ -426,7 +426,7 @@ def train(args, make_env, policy_cls, rnn_cls, wandb,
 
     make_env = env_module.env_creator(env_name)
     policy_cls = getattr(env_module.torch, args['base']['policy_name'])
-    
+
     rnn_name = args['base']['rnn_name']
     rnn_cls = None
     if rnn_name is not None:

diff --git a/evaluate_elos.py b/evaluate_elos.py
@@ -111,7 +111,7 @@ def calc_elo(checkpoint, checkpoint_dir, elos, num_envs=128, num_games=128, num_
     paths.remove(f'{checkpoint_dir}/{checkpoint}')
     print(f'Removed {checkpoint} from paths')
     elos[checkpoint] = 1000
-    
+
     # Sample with replacement if not enough models
     print(f'Sampling {num_opponents} opponents')
     n_models = len(paths)
@@ -171,4 +171,3 @@ def calc_elo(checkpoint, checkpoint_dir, elos, num_envs=128, num_games=128, num_
     checkpoint = 'model_0.pt'
     elos = {'model_random.pt': 1000}
     calc_elo(checkpoint, checkpoint_dir, elos, num_games=16)
-
diff --git a/pufferlib/emulation.py b/pufferlib/emulation.py
@@ -158,7 +158,7 @@ def __init__(self, env=None, env_creator=None, env_args=[], env_kwargs={}, buf=N
             self.obs_struct = self.observations
         else:
             self.obs_struct = self.observations.view(self.obs_dtype)
- 
+
     @property
     def render_mode(self):
         return self.env.render_mode
@@ -184,9 +184,9 @@ def reset(self, seed=None):
         self.terminals[0] = False
         self.truncations[0] = False
         self.masks[0] = True
- 
+
         return self.observations, info
- 
+
     def step(self, action):
         '''Execute an action and return (observation, reward, done, info)'''
         if not self.initialized:
@@ -218,7 +218,7 @@ def step(self, action):
         self.terminals[0] = done
         self.truncations[0] = truncated
         self.masks[0] = True
-                  
+
         self.done = done or truncated
         return self.observations, reward, done, truncated, info
 
@@ -381,7 +381,7 @@ def step(self, actions):
                 self.masks[i] = False
                 continue
 
-            ob = obs[agent] 
+            ob = obs[agent]
             self.mask[agent] = True
             if self.is_obs_emulated:
                 emulate(self.obs_struct[i], ob)
@@ -392,7 +392,7 @@ def step(self, actions):
             self.terminals[i] = dones[agent]
             self.truncations[i] = truncateds[agent]
             self.masks[i] = True
-     
+
         self.all_done = all(dones.values()) or all(truncateds.values())
         rewards = pad_agent_data(rewards, self.possible_agents, 0)
         dones = pad_agent_data(dones, self.possible_agents, True) # You changed this from false to match api test... is this correct?
@@ -408,7 +408,7 @@ def close(self):
 def pad_agent_data(data, agents, pad_value):
     return {agent: data[agent] if agent in data else pad_value
         for agent in agents}
- 
+
 def make_object(object_instance=None, object_creator=None, creator_args=[], creator_kwargs={}):
     if (object_instance is None) == (object_creator is None):
         raise ValueError('Exactly one of object_instance or object_creator must be provided')
@@ -421,7 +421,7 @@ def make_object(object_instance=None, object_creator=None, creator_args=[], crea
     if object_creator is not None:
         if not callable(object_creator):
             raise TypeError('object_creator must be a callable')
-        
+
         if creator_args is None:
             creator_args = []
 
@@ -440,7 +440,7 @@ def check_space(data, space):
     if not contains:
         raise exceptions.APIUsageError(
             f'Data:\n{data}\n not in space:\n{space}')
-    
+
     return True
 
 def _seed_and_reset(env, seed):

diff --git a/pufferlib/environments/atari/environment.py b/pufferlib/environments/atari/environment.py
@@ -38,7 +38,7 @@ def make(name, obs_type='grayscale', frameskip=4,
         render_mode=ale_render_mode)
 
     action_set = env._action_set
-                    
+
     if render_mode != 'human':
         env = pufferlib.postprocess.ResizeObservation(env, downscale=2)
 

diff --git a/pufferlib/environments/classic_control/environment.py b/pufferlib/environments/classic_control/environment.py
@@ -37,4 +37,3 @@ def step(self, action):
         obs, reward, terminated, truncated, info = self.env.step(action)
         reward = abs(obs[0]+0.5)
         return obs, reward, terminated, truncated, info
-
diff --git a/pufferlib/environments/classic_control_continuous/environment.py b/pufferlib/environments/classic_control_continuous/environment.py
@@ -24,4 +24,3 @@ def step(self, action):
         obs, reward, terminated, truncated, info = self.env.step(action)
         reward = abs(obs[0]+0.5)
         return obs, reward, terminated, truncated, info
-
diff --git a/pufferlib/environments/gpudrive/environment.py b/pufferlib/environments/gpudrive/environment.py
@@ -80,7 +80,7 @@ def close(self):
         '''There is no point in closing the env because
         Madrona doesn't close correctly anyways. You will want
         to cache this copy for later use. Cuda errors if you don't'''
-        pass 
+        pass
         #self.env.close()
         #del self.env.sim
 

diff --git a/pufferlib/environments/gpudrive/torch.py b/pufferlib/environments/gpudrive/torch.py
@@ -31,7 +31,7 @@ def unpack_obs(obs_flat):
     # Find the ends of each section
     ro_end_idx = PARTNER_DIM * ROADMAP_AGENT_FEAT_DIM
     rg_end_idx = ro_end_idx + (ROAD_MAP_DIM * TOP_K_ROADPOINTS)
-    
+
     # Unflatten and reshape to (batch_size, num_objects, object_dim)
     road_objects = (vis_state[:, :ro_end_idx]).reshape(
         -1, ROADMAP_AGENT_FEAT_DIM, PARTNER_DIM

diff --git a/pufferlib/environments/griddly/environment.py b/pufferlib/environments/griddly/environment.py
@@ -11,7 +11,7 @@
 
 ALIASES = {
     'spiders': 'GDY-Spiders-v0',
-}    
+}
 
 def env_creator(name='spiders'):
     return functools.partial(make, name)

diff --git a/pufferlib/environments/gvgai/environment.py b/pufferlib/environments/gvgai/environment.py
@@ -25,4 +25,3 @@ def make(name, obs_type='grayscale', frameskip=4, full_action_space=False,
     env = pufferlib.postprocess.EpisodeStats(env)
     env = pufferlib.emulation.GymnasiumPufferEnv(env=env, buf=buf)
     return env
-
diff --git a/pufferlib/environments/magent/environment.py b/pufferlib/environments/magent/environment.py
@@ -18,7 +18,7 @@ def make(name, buf=None):
         env_cls = battle_v4.env
     else:
         raise ValueError(f'Unknown environment name {name}')
- 
+
     env = env_cls()
     env = aec_to_parallel_wrapper(env)
     env = pufferlib.wrappers.PettingZooTruncatedWrapper(env)

diff --git a/pufferlib/environments/magent/torch.py b/pufferlib/environments/magent/torch.py
@@ -7,7 +7,7 @@ class Policy(pufferlib.models.Policy):
     '''Based off of the DQN policy in MAgent'''
     def __init__(self, env, hidden_size=256, output_size=256, kernel_num=32):
         '''The CleanRL default Atari policy: a stack of three convolutions followed by a linear layer
-        
+
         Takes framestack as a mandatory keyword arguments. Suggested default is 1 frame
         with LSTM or 4 frames without.'''
         super().__init__(env)

diff --git a/pufferlib/environments/microrts/environment.py b/pufferlib/environments/microrts/environment.py
@@ -14,7 +14,7 @@ def env_creator(name='GlobalAgentCombinedRewardEnv'):
 
 def make(name, buf=None):
     '''Gym MicroRTS creation function
-    
+
     This library appears broken. Step crashes in Java.
     '''
     pufferlib.environments.try_import('gym_microrts')

diff --git a/pufferlib/environments/minihack/environment.py b/pufferlib/environments/minihack/environment.py
@@ -59,4 +59,3 @@ def render(self):
         chars = nle.nethack.tty_render(
             self.obs['tty_chars'], self.obs['tty_colors'], self.obs['tty_cursor'])
         return chars
-
diff --git a/pufferlib/environments/mujoco/__init__.py b/pufferlib/environments/mujoco/__init__.py
@@ -10,4 +10,4 @@
     try:
         from .policy import Recurrent
     except:
-        Recurrent = None
+        Recurrent = None
diff --git a/pufferlib/environments/mujoco/cleanrl.py b/pufferlib/environments/mujoco/cleanrl.py
@@ -120,7 +120,7 @@
         policy = CleanRLPolicy(envs)
     elif args.policy == "puffer":
         policy = Policy(envs)
-    
+
     agent = pufferlib.cleanrl.Policy(policy).to(device)
     optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5)
 

diff --git a/pufferlib/environments/nethack/wrapper.py b/pufferlib/environments/nethack/wrapper.py
@@ -171,7 +171,7 @@ def __init__(
         self.observation_space = gym.spaces.Box(
             low=0, high=255, shape=self.chw_image_shape, dtype=np.uint8
         )
- 
+
         '''
         obs_spaces = {
             "screen_image": gym.spaces.Box(

diff --git a/pufferlib/environments/nmmo/environment.py b/pufferlib/environments/nmmo/environment.py
@@ -26,7 +26,7 @@ class NMMOWrapper(pufferlib.postprocess.PettingZooWrapper):
     @property
     def render_mode(self):
         return 'rgb_array'
-    
+
     def render(self):
         '''Quick little renderer for NMMO'''
         tiles = self.env.tile_map[:, :, 2].astype(np.uint8)
@@ -73,5 +73,3 @@ def step(self, actions):
 
     def close(self):
         return self.env.close()
-
-
diff --git a/pufferlib/environments/nmmo/torch.py b/pufferlib/environments/nmmo/torch.py
@@ -52,7 +52,7 @@ def encode_observations(self, env_outputs):
     tile = env_outputs['Tile']
     # Center on player
     # This is cursed without clone??
-    tile[:, :, :2] -= tile[:, 112:113, :2].clone() 
+    tile[:, :, :2] -= tile[:, 112:113, :2].clone()
     tile[:, :, :2] += 7
     tile = self.embedding(
         tile.long().clip(0, 255) + self.tile_offset.to(tile.device)

diff --git a/pufferlib/environments/open_spiel/environment.py b/pufferlib/environments/open_spiel/environment.py
@@ -40,7 +40,7 @@ def make(
         min_simulations=int(min_simulations),
         max_simulations=int(max_simulations),
     )
- 
+
     if multiplayer:
         env = OpenSpielPettingZooEnvironment(**kwargs)
         wrapper_cls = pufferlib.emulation.PettingZooPufferEnv
@@ -53,4 +53,3 @@ def make(
         postprocessor_cls=pufferlib.emulation.BasicPostprocessor,
         buf=buf,
     )
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,4 +3,3 @@ global-include *.pxd
		global-include *.h
		global-include *.py
		recursive-include pufferlib/resources *
Original file line number	Diff line number	Diff line change
Expand Up		@@ -30,5 +30,3 @@ def compute_gae(cnp.ndarray dones, cnp.ndarray values,
		c_advantages[t_cur] = lastgaelam

		return advantages
Original file line number	Diff line number	Diff line change
Expand Up		@@ -42,5 +42,3 @@ max = 524288
		distribution = uniform
		min = 2048
		max = 32768
Original file line number	Diff line number	Diff line change
Expand Up		@@ -58,4 +58,3 @@ max = 0.0
		distribution = uniform
		min = 1_000_000_000
		max = 10_000_000_000
Original file line number	Diff line number	Diff line change
Expand Up		@@ -26,6 +26,3 @@ gae_lambda = 0.8297991396183212
		vf_coef = 0.3974834958825928
		clip_coef = 0.1
		vf_clip_coef = 0.1
Original file line number	Diff line number	Diff line change
Expand Up		@@ -37,4 +37,3 @@ def step(self, action):
		obs, reward, terminated, truncated, info = self.env.step(action)
		reward = abs(obs[0]+0.5)
		return obs, reward, terminated, truncated, info
Original file line number	Diff line number	Diff line change
Expand Up		@@ -24,4 +24,3 @@ def step(self, action):
		obs, reward, terminated, truncated, info = self.env.step(action)
		reward = abs(obs[0]+0.5)
		return obs, reward, terminated, truncated, info
Original file line number	Diff line number	Diff line change
Expand Up		@@ -25,4 +25,3 @@ def make(name, obs_type='grayscale', frameskip=4, full_action_space=False,
		env = pufferlib.postprocess.EpisodeStats(env)
		env = pufferlib.emulation.GymnasiumPufferEnv(env=env, buf=buf)
		return env
Original file line number	Diff line number	Diff line change
Expand Up		@@ -59,4 +59,3 @@ def render(self):
		chars = nle.nethack.tty_render(
		self.obs['tty_chars'], self.obs['tty_colors'], self.obs['tty_cursor'])
		return chars