PufferAI · xinpw8 · Oct 31, 2024 · Nov 2, 2024 · Nov 5, 2024 · Nov 6, 2024
diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml
@@ -0,0 +1,29 @@
+name: install
+on:
+  push:
+  pull_request:
+
+jobs:
+  test:
+    name: test ${{ matrix.py }} - ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-latest
+          - macos-latest
+        py:
+          - "3.11"
+          - "3.10"
+          - "3.9"
+    steps:
+      - name: Setup python for test ${{ matrix.py }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.py }}
+      - uses: actions/checkout@v3
+      - name: Upgrade pip
+        run: python -m pip install -U pip
+      - name: Install pufferlib
+        run: pip3 install -e . 
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,13 @@
+# Annoying temp files generated by Cython
+c_gae.c
+pufferlib/extensions.c
+pufferlib/ocean/grid/c_grid.c
+pufferlib/ocean/tactical/c_tactical.c
+pufferlib/puffernet.c
+
+# Raylib
+raylib_wasm/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,2 +1,6 @@
 global-include *.pyx
 global-include *.pxd
+global-include *.h
+global-include *.py
+recursive-include pufferlib/resources *
+
diff --git a/README.md b/README.md
@@ -1,39 +1,16 @@
 ![figure](https://pufferai.github.io/source/resource/header.png)
 
 [![PyPI version](https://badge.fury.io/py/pufferlib.svg)](https://badge.fury.io/py/pufferlib)
+![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pufferlib)
+![Github Actions](https://github.com/PufferAI/PufferLib/actions/workflows/install.yml/badge.svg)
 [![](https://dcbadge.vercel.app/api/server/spT4huaGYV?style=plastic)](https://discord.gg/spT4huaGYV)
 [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Follow%20%40jsuarez5341)](https://twitter.com/jsuarez5341)
 
-You have an environment, a PyTorch model, and a reinforcement learning framework that are designed to work together but don’t. PufferLib is a wrapper layer that makes RL on complex game environments as simple as RL on Atari. You write a native PyTorch network and a short binding for your environment; PufferLib takes care of the rest.
+PufferLib is the reinforcement learning library I wish existed during my PhD. It started as a compatibility layer to make working with complex environments a breeze. Now, it's a high-performance toolkit for research and industry with optimized parallel simulation, environments that run and train at 1M+ steps/second, and tons of quality of life improvements for practitioners. All our tools are free and open source. We also offer priority service for companies, startups, and labs!
 
-All of our [Documentation](https://pufferai.github.io "PufferLib Documentation") is hosted by github.io. @jsuarez5341 on [Discord](https://discord.gg/spT4huaGYV) for support -- post here before opening issues. I am also looking for contributors interested in adding bindings for other environments and RL frameworks.
+![Trailer](https://github.com/PufferAI/puffer.ai/blob/main/docs/assets/puffer_2.gif?raw=true)
 
-## Demo
-
-The current `demo.py` is a souped-up version of CleanRL PPO with optimized LSTM support, detailed performance metrics, a local dashboard, async envpool sampling, checkpointing, wandb sweeps, and more. It has a powerful `--help` that generates options based on the specified environment and policy. Hyperparams are in `config.yaml`. A few examples:
-
-```
-# Train minigrid with multiprocessing. Save it as a baseline.
-python demo.py --env minigrid --mode train --vec multiprocessing
-```
-
-![figure](https://raw.githubusercontent.com/PufferAI/pufferai.github.io/1.0/docs/source/resource/puffer-dash.png)
-
-```
-# Load the current minigrid baseline and render it locally
-python demo.py --env minigrid --mode eval --baseline
-
-# Train squared with serial vectorization and save it as a wandb baseline
-# The, load the current squared baseline and render it locally
-python demo.py --env squared --mode train --baseline
-python demo.py --env squared --mode eval --baseline
-
-# Render NMMO locally with a random policy
-python demo.py --env nmmo --mode eval
-
-# Autotune vectorization settings for your machine
-python demo.py --env breakout --mode autotune
-```
+All of our documentation is hosted at [puffer.ai](https://puffer.ai "PufferLib Documentation"). @jsuarez5341 on [Discord](https://discord.gg/puffer) for support -- post here before opening issues. We're always looking for new contributors, too!
 
 ## Star to puff up the project!
 

diff --git a/clean_pufferl.py b/clean_pufferl.py
@@ -126,21 +126,6 @@ def evaluate(data):
             data.vecenv.send(actions)
 
     with profile.eval_misc:
-        # Moves into models... maybe. Definitely moves.
-        # You could also just return infos and have it in demo
-        if 'pokemon_exploration_map' in infos:
-            for pmap in infos['pokemon_exploration_map']:
-                if not hasattr(data, 'pokemon_map'):
-                    import pokemon_red_eval
-                    data.map_updater = pokemon_red_eval.map_updater()
-                    data.pokemon_map = pmap
-
-                data.pokemon_map = np.maximum(data.pokemon_map, pmap)
-
-            if len(infos['pokemon_exploration_map']) > 0:
-                rendered = data.map_updater(data.pokemon_map)
-                data.stats['Media/exploration_map'] = data.wandb.Image(rendered)
-
         for k, v in infos.items():
             if '_map' in k and data.wandb is not None:
                 data.stats[f'Media/{k}'] = data.wandb.Image(v[0])
@@ -703,7 +688,7 @@ def print_dashboard(env_name, utilization, global_step, epoch,
     table.add_column(justify="center", width=13)
     table.add_column(justify="right", width=13)
     table.add_row(
-        f':blowfish: {c1}PufferLib {b2}1.0.0',
+        f':blowfish: {c1}PufferLib {b2}2.0.0',
         f'{c1}CPU: {c3}{cpu_percent:.1f}%',
         f'{c1}GPU: {c3}{gpu_percent:.1f}%',
         f'{c1}DRAM: {c3}{dram_percent:.1f}%',

diff --git a/config/ocean/pysquared.ini b/config/ocean/pysquared.ini
@@ -0,0 +1,21 @@
+[base]
+package = ocean
+env_name = puffer_pysquared
+policy_name = Policy
+rnn_name = Recurrent
+
+[env]
+num_envs = 1
+
+[train]
+total_timesteps = 40_000_000
+checkpoint_interval = 50
+num_envs = 12288
+num_workers = 12
+env_batch_size = 4096
+batch_size = 131072
+update_epochs = 1
+minibatch_size = 8192
+learning_rate = 0.0017
+anneal_lr = False
+device = cuda
diff --git a/config/ocean/trash_pickup.ini b/config/ocean/trash_pickup.ini
@@ -0,0 +1,64 @@
+[base]
+package = ocean
+env_name = trash_pickup puffer_trash_pickup 
+policy_name = TrashPickup
+rnn_name = Recurrent
+
+[env]
+num_envs = 1024  # Recommended: 4096 (recommended start value) / num_agents 
+grid_size = 10
+num_agents = 4
+num_trash = 20
+num_bins = 1
+max_steps = 150
+report_interval = 32
+agent_sight_range = 5 # only used with 2D local crop obs space
+
+[train]
+total_timesteps = 100_000_000
+checkpoint_interval = 200
+num_envs = 2
+num_workers = 2
+env_batch_size = 1
+batch_size = 131072
+update_epochs = 1
+minibatch_size = 16384
+bptt_horizon = 8
+anneal_lr = False
+device = cuda
+learning_rate=0.001
+gamma = 0.95
+gae_lambda = 0.85
+vf_ceof = 0.4
+clip_coef = 0.1
+vf_clip_coef = 0.1
+ent_coef = 0.01
+
+[sweep.metric]
+goal = maximize
+name = environment/episode_return
+
+[sweep.parameters.train.parameters.learning_rate]
+distribution = log_uniform_values
+min = 0.000001
+max = 0.01
+
+[sweep.parameters.train.parameters.gamma]
+distribution = uniform
+min = 0
+max = 1
+
+[sweep.parameters.train.parameters.gae_lambda]
+distribution = uniform
+min = 0
+max = 1
+
+[sweep.parameters.train.parameters.update_epochs]
+distribution = int_uniform
+min = 1
+max = 4
+
+[sweep.parameters.train.parameters.ent_coef]
+distribution = log_uniform_values
+min = 1e-5
+max = 1e-1
diff --git a/pokemon_red_eval.py b/pokemon_red_eval.py