Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixup setup.py + add pre-commit #142

Open
wants to merge 4 commits into
base: 2.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,10 @@ checkpoints/
experiments/
wandb/
raylib/

c_gae.c
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These files are all generated when you run pip install --editable . -- seems like they should be in the .gitignore

pufferlib/extensions.c
pufferlib/ocean/grid/c_grid.c
pufferlib/ocean/tactical/c_tactical.c
pufferlib/puffernet.c
raylib_wasm/
13 changes: 13 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks

repos:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: Should we add pre-commit to requirements.txt or something? Not sure how this repo handles dev dependencies, a bit confusing...

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files

# TODO: Add ruff + ruff format for nice linting and consistency
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add pre-commit to make it easier to contribute to the repo -- TODO: standardize how to install this? Should we add a Makefile?

1 change: 0 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,3 @@ global-include *.pxd
global-include *.h
global-include *.py
recursive-include pufferlib/resources *

4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ All of our documentation is hosted at [puffer.ai](https://puffer.ai "PufferLib D
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=pufferai/pufferlib&type=Date" />
</picture>
</a>

## Contributions

We're always looking for new contributors! When first starting out, don't forget to run `pre-commit install` before committing.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

README still could benefit from more details and context, it's bumpy coming in as a new user, but this is a nice start

2 changes: 0 additions & 2 deletions c_gae.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,3 @@ def compute_gae(cnp.ndarray dones, cnp.ndarray values,
c_advantages[t_cur] = lastgaelam

return advantages


4 changes: 2 additions & 2 deletions clean_pufferl.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ def store(self, obs, value, action, logprob, reward, done, env_id, mask):
ptr = self.ptr
indices = torch.where(mask)[0].numpy()[:self.batch_size - ptr]
end = ptr + len(indices)

self.obs[ptr:end] = obs.to(self.obs.device)[indices]
self.values_np[ptr:end] = value.cpu().numpy()[indices]
self.actions_np[ptr:end] = action[indices]
Expand Down Expand Up @@ -694,7 +694,7 @@ def print_dashboard(env_name, utilization, global_step, epoch,
f'{c1}DRAM: {c3}{dram_percent:.1f}%',
f'{c1}VRAM: {c3}{vram_percent:.1f}%',
)

s = Table(box=None, expand=True)
s.add_column(f"{c1}Summary", justify='left', vertical='top', width=16)
s.add_column(f"{c1}Value", justify='right', vertical='top', width=8)
Expand Down
2 changes: 1 addition & 1 deletion config/atari/default.ini
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ max = 10
#distribution = uniform
#min = 0
#max = 1

[sweep.parameters.train.parameters.total_timesteps]
distribution = uniform
min = 5_000_000
Expand Down
2 changes: 1 addition & 1 deletion config/atari/enduro.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[base]
package = atari
env_name = enduro
env_name = enduro

[train]
total_timesteps = 16_657_125
Expand Down
2 changes: 0 additions & 2 deletions config/gpudrive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,3 @@ max = 524288
distribution = uniform
min = 2048
max = 32768


2 changes: 1 addition & 1 deletion config/mujoco.ini
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ batch_size = 2048
minibatch_size = 32
bptt_horizon = 1
compile = False
compile_mode = reduce-overhead
compile_mode = reduce-overhead
2 changes: 1 addition & 1 deletion config/ocean/enduro.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[base]
package = ocean
env_name = puffer_enduro
env_name = puffer_enduro
policy_name = Policy
rnn_name = Recurrent

Expand Down
2 changes: 1 addition & 1 deletion config/ocean/moba.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ max = 0.05
distribution = uniform
min = 0.0
max = 5.0

[sweep.parameters.train.parameters.total_timesteps]
distribution = uniform
min = 200_000_000
Expand Down
1 change: 0 additions & 1 deletion config/ocean/nmmo3.ini
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,3 @@ max = 0.0
distribution = uniform
min = 1_000_000_000
max = 10_000_000_000

2 changes: 1 addition & 1 deletion config/ocean/pong.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[base]
package = ocean
env_name = puffer_pong
env_name = puffer_pong
policy_name = Policy
rnn_name = Recurrent

Expand Down
3 changes: 0 additions & 3 deletions config/ocean/rware.ini
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,3 @@ gae_lambda = 0.8297991396183212
vf_coef = 0.3974834958825928
clip_coef = 0.1
vf_clip_coef = 0.1



4 changes: 2 additions & 2 deletions config/ocean/trash_pickup.ini
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
[base]
package = ocean
env_name = trash_pickup puffer_trash_pickup
env_name = trash_pickup puffer_trash_pickup
policy_name = TrashPickup
rnn_name = Recurrent

[env]
num_envs = 1024 # Recommended: 4096 (recommended start value) / num_agents
num_envs = 1024 # Recommended: 4096 (recommended start value) / num_agents
grid_size = 10
num_agents = 4
num_trash = 20
Expand Down
4 changes: 2 additions & 2 deletions demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
signal.signal(signal.SIGINT, lambda sig, frame: os._exit(0))

import clean_pufferl

def make_policy(env, policy_cls, rnn_cls, args):
policy = policy_cls(env, **args['policy'])
if rnn_cls is not None:
Expand Down Expand Up @@ -426,7 +426,7 @@ def train(args, make_env, policy_cls, rnn_cls, wandb,

make_env = env_module.env_creator(env_name)
policy_cls = getattr(env_module.torch, args['base']['policy_name'])

rnn_name = args['base']['rnn_name']
rnn_cls = None
if rnn_name is not None:
Expand Down
3 changes: 1 addition & 2 deletions evaluate_elos.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def calc_elo(checkpoint, checkpoint_dir, elos, num_envs=128, num_games=128, num_
paths.remove(f'{checkpoint_dir}/{checkpoint}')
print(f'Removed {checkpoint} from paths')
elos[checkpoint] = 1000

# Sample with replacement if not enough models
print(f'Sampling {num_opponents} opponents')
n_models = len(paths)
Expand Down Expand Up @@ -171,4 +171,3 @@ def calc_elo(checkpoint, checkpoint_dir, elos, num_envs=128, num_games=128, num_
checkpoint = 'model_0.pt'
elos = {'model_random.pt': 1000}
calc_elo(checkpoint, checkpoint_dir, elos, num_games=16)

18 changes: 9 additions & 9 deletions pufferlib/emulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def __init__(self, env=None, env_creator=None, env_args=[], env_kwargs={}, buf=N
self.obs_struct = self.observations
else:
self.obs_struct = self.observations.view(self.obs_dtype)

@property
def render_mode(self):
return self.env.render_mode
Expand All @@ -184,9 +184,9 @@ def reset(self, seed=None):
self.terminals[0] = False
self.truncations[0] = False
self.masks[0] = True

return self.observations, info

def step(self, action):
'''Execute an action and return (observation, reward, done, info)'''
if not self.initialized:
Expand Down Expand Up @@ -218,7 +218,7 @@ def step(self, action):
self.terminals[0] = done
self.truncations[0] = truncated
self.masks[0] = True

self.done = done or truncated
return self.observations, reward, done, truncated, info

Expand Down Expand Up @@ -381,7 +381,7 @@ def step(self, actions):
self.masks[i] = False
continue

ob = obs[agent]
ob = obs[agent]
self.mask[agent] = True
if self.is_obs_emulated:
emulate(self.obs_struct[i], ob)
Expand All @@ -392,7 +392,7 @@ def step(self, actions):
self.terminals[i] = dones[agent]
self.truncations[i] = truncateds[agent]
self.masks[i] = True

self.all_done = all(dones.values()) or all(truncateds.values())
rewards = pad_agent_data(rewards, self.possible_agents, 0)
dones = pad_agent_data(dones, self.possible_agents, True) # You changed this from false to match api test... is this correct?
Expand All @@ -408,7 +408,7 @@ def close(self):
def pad_agent_data(data, agents, pad_value):
return {agent: data[agent] if agent in data else pad_value
for agent in agents}

def make_object(object_instance=None, object_creator=None, creator_args=[], creator_kwargs={}):
if (object_instance is None) == (object_creator is None):
raise ValueError('Exactly one of object_instance or object_creator must be provided')
Expand All @@ -421,7 +421,7 @@ def make_object(object_instance=None, object_creator=None, creator_args=[], crea
if object_creator is not None:
if not callable(object_creator):
raise TypeError('object_creator must be a callable')

if creator_args is None:
creator_args = []

Expand All @@ -440,7 +440,7 @@ def check_space(data, space):
if not contains:
raise exceptions.APIUsageError(
f'Data:\n{data}\n not in space:\n{space}')

return True

def _seed_and_reset(env, seed):
Expand Down
2 changes: 1 addition & 1 deletion pufferlib/environments/atari/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def make(name, obs_type='grayscale', frameskip=4,
render_mode=ale_render_mode)

action_set = env._action_set

if render_mode != 'human':
env = pufferlib.postprocess.ResizeObservation(env, downscale=2)

Expand Down
1 change: 0 additions & 1 deletion pufferlib/environments/classic_control/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,3 @@ def step(self, action):
obs, reward, terminated, truncated, info = self.env.step(action)
reward = abs(obs[0]+0.5)
return obs, reward, terminated, truncated, info

Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,3 @@ def step(self, action):
obs, reward, terminated, truncated, info = self.env.step(action)
reward = abs(obs[0]+0.5)
return obs, reward, terminated, truncated, info

2 changes: 1 addition & 1 deletion pufferlib/environments/gpudrive/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def close(self):
'''There is no point in closing the env because
Madrona doesn't close correctly anyways. You will want
to cache this copy for later use. Cuda errors if you don't'''
pass
pass
#self.env.close()
#del self.env.sim

Expand Down
2 changes: 1 addition & 1 deletion pufferlib/environments/gpudrive/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def unpack_obs(obs_flat):
# Find the ends of each section
ro_end_idx = PARTNER_DIM * ROADMAP_AGENT_FEAT_DIM
rg_end_idx = ro_end_idx + (ROAD_MAP_DIM * TOP_K_ROADPOINTS)

# Unflatten and reshape to (batch_size, num_objects, object_dim)
road_objects = (vis_state[:, :ro_end_idx]).reshape(
-1, ROADMAP_AGENT_FEAT_DIM, PARTNER_DIM
Expand Down
2 changes: 1 addition & 1 deletion pufferlib/environments/griddly/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

ALIASES = {
'spiders': 'GDY-Spiders-v0',
}
}

def env_creator(name='spiders'):
return functools.partial(make, name)
Expand Down
1 change: 0 additions & 1 deletion pufferlib/environments/gvgai/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,3 @@ def make(name, obs_type='grayscale', frameskip=4, full_action_space=False,
env = pufferlib.postprocess.EpisodeStats(env)
env = pufferlib.emulation.GymnasiumPufferEnv(env=env, buf=buf)
return env

2 changes: 1 addition & 1 deletion pufferlib/environments/magent/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def make(name, buf=None):
env_cls = battle_v4.env
else:
raise ValueError(f'Unknown environment name {name}')

env = env_cls()
env = aec_to_parallel_wrapper(env)
env = pufferlib.wrappers.PettingZooTruncatedWrapper(env)
Expand Down
2 changes: 1 addition & 1 deletion pufferlib/environments/magent/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class Policy(pufferlib.models.Policy):
'''Based off of the DQN policy in MAgent'''
def __init__(self, env, hidden_size=256, output_size=256, kernel_num=32):
'''The CleanRL default Atari policy: a stack of three convolutions followed by a linear layer

Takes framestack as a mandatory keyword arguments. Suggested default is 1 frame
with LSTM or 4 frames without.'''
super().__init__(env)
Expand Down
2 changes: 1 addition & 1 deletion pufferlib/environments/microrts/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def env_creator(name='GlobalAgentCombinedRewardEnv'):

def make(name, buf=None):
'''Gym MicroRTS creation function

This library appears broken. Step crashes in Java.
'''
pufferlib.environments.try_import('gym_microrts')
Expand Down
1 change: 0 additions & 1 deletion pufferlib/environments/minihack/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,3 @@ def render(self):
chars = nle.nethack.tty_render(
self.obs['tty_chars'], self.obs['tty_colors'], self.obs['tty_cursor'])
return chars

2 changes: 1 addition & 1 deletion pufferlib/environments/mujoco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@
try:
from .policy import Recurrent
except:
Recurrent = None
Recurrent = None
2 changes: 1 addition & 1 deletion pufferlib/environments/mujoco/cleanrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@
policy = CleanRLPolicy(envs)
elif args.policy == "puffer":
policy = Policy(envs)

agent = pufferlib.cleanrl.Policy(policy).to(device)
optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5)

Expand Down
2 changes: 1 addition & 1 deletion pufferlib/environments/nethack/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def __init__(
self.observation_space = gym.spaces.Box(
low=0, high=255, shape=self.chw_image_shape, dtype=np.uint8
)

'''
obs_spaces = {
"screen_image": gym.spaces.Box(
Expand Down
4 changes: 1 addition & 3 deletions pufferlib/environments/nmmo/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class NMMOWrapper(pufferlib.postprocess.PettingZooWrapper):
@property
def render_mode(self):
return 'rgb_array'

def render(self):
'''Quick little renderer for NMMO'''
tiles = self.env.tile_map[:, :, 2].astype(np.uint8)
Expand Down Expand Up @@ -73,5 +73,3 @@ def step(self, actions):

def close(self):
return self.env.close()


2 changes: 1 addition & 1 deletion pufferlib/environments/nmmo/torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def encode_observations(self, env_outputs):
tile = env_outputs['Tile']
# Center on player
# This is cursed without clone??
tile[:, :, :2] -= tile[:, 112:113, :2].clone()
tile[:, :, :2] -= tile[:, 112:113, :2].clone()
tile[:, :, :2] += 7
tile = self.embedding(
tile.long().clip(0, 255) + self.tile_offset.to(tile.device)
Expand Down
3 changes: 1 addition & 2 deletions pufferlib/environments/open_spiel/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def make(
min_simulations=int(min_simulations),
max_simulations=int(max_simulations),
)

if multiplayer:
env = OpenSpielPettingZooEnvironment(**kwargs)
wrapper_cls = pufferlib.emulation.PettingZooPufferEnv
Expand All @@ -53,4 +53,3 @@ def make(
postprocessor_cls=pufferlib.emulation.BasicPostprocessor,
buf=buf,
)

Loading