Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] 3D Ocean environment: CodeBall #139

Draft
wants to merge 43 commits into
base: 2.0
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
53a5ce3
Initial LM-generated file
neverix Dec 14, 2024
9f0ab27
More interesting robot dynamics
neverix Dec 14, 2024
c55db3b
Separate out header file
neverix Dec 14, 2024
f0857ee
Improve 2D rendering
neverix Dec 14, 2024
c309df6
3D rendering
neverix Dec 14, 2024
c44e771
Cube -> sphere
neverix Dec 14, 2024
d8a16aa
Write shaders. nothing works
neverix Dec 14, 2024
abe5a84
Maybe I should remove the render texture
neverix Dec 14, 2024
7775771
Position works (but not normals)
neverix Dec 14, 2024
5d58f36
Add fragment shader
neverix Dec 15, 2024
41ea81a
Factor out renderer
neverix Dec 15, 2024
2a2c11d
More refactoring, add Python wrappers
neverix Dec 15, 2024
7589965
Start cythonizing environment
neverix Dec 15, 2024
05a33bb
Finish Python bindings
neverix Dec 15, 2024
5beb54d
Get training to run
neverix Dec 15, 2024
4b2ad48
Add rewards
neverix Dec 15, 2024
b78dd19
Efficient Python<->C intercom
neverix Dec 15, 2024
4f6b26a
Optimize observations, make environment actualy work
neverix Dec 15, 2024
17af960
Implement logging
neverix Dec 15, 2024
a916184
Final commit
neverix Dec 16, 2024
d2dd084
Fix C bugs, prepare to run checkpoints in main
neverix Dec 16, 2024
3d49de2
Fix more bugs for GPU training
neverix Dec 16, 2024
9b6b87a
Make training work for simple rewards, discover an architecture limit…
neverix Dec 17, 2024
758e56a
Add rendering (went overboard)
neverix Dec 17, 2024
79da029
Statically link shaders; test training more quickly; make agents maxi…
neverix Dec 18, 2024
da137c1
Merge branch 'PufferAI:2.0' into codeball
neverix Dec 21, 2024
78c6414
Experiments with reward
neverix Dec 21, 2024
763756d
Merge branch 'codeball' of https://github.com/neverix/PufferLib into …
neverix Dec 21, 2024
d7a28c2
Fix warnings
neverix Dec 21, 2024
e649a76
Improve hparams, make agent maximize reward somewhere
neverix Dec 22, 2024
c0b4453
Mess around with rewards
neverix Dec 23, 2024
5173b98
Generate better logs, rewards and observations. Switch to continuous …
neverix Dec 23, 2024
62c0cbd
Add dumb rock baseline
neverix Dec 23, 2024
791699d
Beat 0-opponent, fix memory leak
neverix Dec 23, 2024
31e2293
Merge branch '2.0' into codeball
neverix Dec 24, 2024
eaf3112
Make rewards and baselines configurable, try score-only training
neverix Dec 27, 2024
4f8fbf9
Render Nitro packs
neverix Dec 27, 2024
ac56711
Remove almost all copies
neverix Dec 27, 2024
433a219
Remove setup.py
neverix Dec 27, 2024
34748e3
Remove (the right) setup.py
neverix Dec 27, 2024
2413e02
Update .gitignore
neverix Dec 27, 2024
27cb7e8
Many code fixes
neverix Dec 27, 2024
e8f4666
Tune hyperparameters more
neverix Dec 27, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,13 @@ checkpoints/
experiments/
wandb/
raylib/

raylib_wasm/*
c_gae.c
pufferlib/puffernet.c
pufferlib/extensions.c
pufferlib/ocean/tactical/c_tactical.c
pufferlib/ocean/grid/c_grid.c
.idea/
*.dSYM/
/codeball
33 changes: 33 additions & 0 deletions codeball_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from pufferlib.ocean.torch import MLPPolicy, Recurrent
from pufferlib.ocean.codeball.codeball import CodeBall
from tqdm import trange
import numpy as np
import torch
import os


env = CodeBall(num_envs=1, n_robots=8, scripted_opponent_type="zero", frame_skip=5)
obs, _ = env.reset()


# pol = MLPPolicy(env)
# rnn = Recurrent(env, pol)
wp = None
if wp is None:
from glob import glob
wp = max(glob("experiments/**/model_*.pt", recursive=True), key=lambda f: os.path.getmtime(f))
rnn = torch.load(wp, map_location='cpu')
rnn_state = None
torch.set_grad_enabled(False)
for _ in (bar := trange(10_000)):
obs = torch.from_numpy(obs).float()
actions, logprob, entropy, value, rnn_state = rnn(obs, rnn_state)
# logits, value, rnn_state = rnn.policy(obs, rnn_state)
# actions = logits
actions = actions.numpy()
obs, rewards, terminated, truncated, info = env.step(actions)
bar.set_postfix(val=(value.reshape(-1, 2).mean(0).tolist()))
if (terminated | truncated).any():
rnn_state = None
env.render()
env.close()
70 changes: 70 additions & 0 deletions config/ocean/codeball.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
[base]
package = ocean
env_name = puffer_codeball
policy_name = Policy
; policy_name = MLPPolicy
rnn_name = Recurrent

[rnn]
; input_size = 256
; hidden_size = 256
; num_layers = 3

[env]
num_envs = 32
n_robots = 8
n_nitros = 4
frame_skip = 3
max_steps = 1500
scripted_opponent_type = zero
goal_scored_reward = 5.0
loiter_penalty = 0.0
ball_reward = 0.05

[train]
total_timesteps = 200_000_000
num_envs = 2
num_workers = 2
; env_batch_size = 1
batch_size = 65536
update_epochs = 1
; minibatch_size = 32768
bptt_horizon = 16
anneal_lr = False
; gae_lambda = 0.9776227170639571
; gamma = 0.8567482546637853
; clip_coef = 0.011102333784435113
; vf_coef = 0.3403069830175013
; vf_clip_coef = 0.26475190539131727
max_grad_norm = 0.8660179376602173
; ent_coef = 0.01376980586465873
learning_rate = 0.002064722899262613
; learning_rate = 0.0005978428084749276
minibatch_size = 4096
; bptt_horizon = 16
; anneal_lr = False

; gamma = 0.5
; gamma = 0.95
; gamma = 0.95
gamma = 0.98
; gamma = 0.0
; gae_lambda = 0.98
gae_lambda = 0.98

; gamma = 0.9257755108746066
; gae_lambda = 0.8783667470139129

; gamma = 0.98
; gae_lambda = 0.97
; ent_coef = 0.0012080029654114927
ent_coef = 0.0005
; max_grad_norm = 0.3808319568634033
vf_coef = 0.6
checkpoint_interval = 20
; device = cuda
device = cpu

[sweep.metric]
goal = maximize
name = environment/reward
2 changes: 2 additions & 0 deletions pufferlib/ocean/codeball/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
codeball
setup.py
38 changes: 38 additions & 0 deletions pufferlib/ocean/codeball/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
CFILES=codeball.c
HFILES=codeball.h renderer.h
CFLAGS=\
-lm -I../../../raylib/include -I../../../pufferlib -lpthread ../../../raylib/lib/libraylib.a \
-O1 -g -fsanitize=address -fno-omit-frame-pointer \
-framework Cocoa -framework OpenGL -framework IOKit -framework CoreVideo -framework GLUT -framework AppKit -lglfw -lobjc \
# -O3 -march=native -funroll-loops
SHADERS=base_vs.h fragment_fs.h

ARCH=$(shell uname -m)
PYTHON_VERSION=$(shell python -c 'import sys; print(str(sys.version_info.major) + str(sys.version_info.minor) + "-" + sys.platform)')
PYTHON_SO_POSTFIX=cpython-${PYTHON_VERSION}.so

clean:
rm -f codeball *.so cy_codeball.c

base_vs.h: base.vs
xxd -i base.vs > base_vs.h

fragment_fs.h: fragment.fs
xxd -i fragment.fs > fragment_fs.h

codeball: $(CFILES) $(HFILES) Makefile $(SHADERS)
gcc $(CFLAGS) $(CFILES) -o codeball

codeball.pxd: cy_codeball.pyx codeball.h
autopxd codeball.h codeball.pxd

cy_codeball.${PYTHON_SO_POSTFIX}: $(CFILES) $(HFILES) $(SHADERS) cy_codeball.pyx codeball.pxd renderer.pxd setup.py
python setup.py build_ext --inplace

run_py: cy_codeball.${PYTHON_SO_POSTFIX}
python -m codeball

run: codeball
./codeball

.PHONY: clean run run_py
32 changes: 32 additions & 0 deletions pufferlib/ocean/codeball/base.vs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#version 330

// Input vertex attributes
in vec3 vertexPosition;
in vec2 vertexTexCoord;
in vec3 vertexNormal;
in vec4 vertexColor;

// Input uniform values
uniform mat4 mvp;
uniform mat4 matModel;
uniform mat4 matNormal;

// Output vertex attributes (to fragment shader)
out vec3 fragPosition;
out vec2 fragTexCoord;
out vec4 fragColor;
out vec3 fragNormal;

// NOTE: Add here your custom variables

void main() {
// Send vertex attributes to fragment shader
fragPosition = vec3(matModel * vec4(vertexPosition, 1.0));
fragTexCoord = vertexTexCoord;
fragColor = vertexColor;
// fragNormal = normalize(vec3(matNormal * vec4(vertexNormal, 1.0)));
fragNormal = normalize(vertexNormal);

// Calculate final vertex position
gl_Position = mvp * vec4(vertexPosition, 1.0);
}
70 changes: 70 additions & 0 deletions pufferlib/ocean/codeball/base_vs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
unsigned char base_vs[] = {
0x23, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x33, 0x33, 0x30,
0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x76,
0x65, 0x72, 0x74, 0x65, 0x78, 0x20, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62,
0x75, 0x74, 0x65, 0x73, 0x0a, 0x69, 0x6e, 0x20, 0x76, 0x65, 0x63, 0x33,
0x20, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78, 0x50, 0x6f, 0x73, 0x69, 0x74,
0x69, 0x6f, 0x6e, 0x3b, 0x0a, 0x69, 0x6e, 0x20, 0x76, 0x65, 0x63, 0x32,
0x20, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78, 0x54, 0x65, 0x78, 0x43, 0x6f,
0x6f, 0x72, 0x64, 0x3b, 0x0a, 0x69, 0x6e, 0x20, 0x76, 0x65, 0x63, 0x33,
0x20, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78, 0x4e, 0x6f, 0x72, 0x6d, 0x61,
0x6c, 0x3b, 0x0a, 0x69, 0x6e, 0x20, 0x76, 0x65, 0x63, 0x34, 0x20, 0x76,
0x65, 0x72, 0x74, 0x65, 0x78, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x3b, 0x0a,
0x0a, 0x2f, 0x2f, 0x20, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x75, 0x6e,
0x69, 0x66, 0x6f, 0x72, 0x6d, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73,
0x0a, 0x75, 0x6e, 0x69, 0x66, 0x6f, 0x72, 0x6d, 0x20, 0x6d, 0x61, 0x74,
0x34, 0x20, 0x6d, 0x76, 0x70, 0x3b, 0x0a, 0x75, 0x6e, 0x69, 0x66, 0x6f,
0x72, 0x6d, 0x20, 0x6d, 0x61, 0x74, 0x34, 0x20, 0x6d, 0x61, 0x74, 0x4d,
0x6f, 0x64, 0x65, 0x6c, 0x3b, 0x0a, 0x75, 0x6e, 0x69, 0x66, 0x6f, 0x72,
0x6d, 0x20, 0x6d, 0x61, 0x74, 0x34, 0x20, 0x6d, 0x61, 0x74, 0x4e, 0x6f,
0x72, 0x6d, 0x61, 0x6c, 0x3b, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x4f, 0x75,
0x74, 0x70, 0x75, 0x74, 0x20, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78, 0x20,
0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x20, 0x28,
0x74, 0x6f, 0x20, 0x66, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x20,
0x73, 0x68, 0x61, 0x64, 0x65, 0x72, 0x29, 0x0a, 0x6f, 0x75, 0x74, 0x20,
0x76, 0x65, 0x63, 0x33, 0x20, 0x66, 0x72, 0x61, 0x67, 0x50, 0x6f, 0x73,
0x69, 0x74, 0x69, 0x6f, 0x6e, 0x3b, 0x0a, 0x6f, 0x75, 0x74, 0x20, 0x76,
0x65, 0x63, 0x32, 0x20, 0x66, 0x72, 0x61, 0x67, 0x54, 0x65, 0x78, 0x43,
0x6f, 0x6f, 0x72, 0x64, 0x3b, 0x0a, 0x6f, 0x75, 0x74, 0x20, 0x76, 0x65,
0x63, 0x34, 0x20, 0x66, 0x72, 0x61, 0x67, 0x43, 0x6f, 0x6c, 0x6f, 0x72,
0x3b, 0x0a, 0x6f, 0x75, 0x74, 0x20, 0x76, 0x65, 0x63, 0x33, 0x20, 0x66,
0x72, 0x61, 0x67, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x3b, 0x0a, 0x0a,
0x2f, 0x2f, 0x20, 0x4e, 0x4f, 0x54, 0x45, 0x3a, 0x20, 0x41, 0x64, 0x64,
0x20, 0x68, 0x65, 0x72, 0x65, 0x20, 0x79, 0x6f, 0x75, 0x72, 0x20, 0x63,
0x75, 0x73, 0x74, 0x6f, 0x6d, 0x20, 0x76, 0x61, 0x72, 0x69, 0x61, 0x62,
0x6c, 0x65, 0x73, 0x0a, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x6d, 0x61,
0x69, 0x6e, 0x28, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f,
0x2f, 0x20, 0x53, 0x65, 0x6e, 0x64, 0x20, 0x76, 0x65, 0x72, 0x74, 0x65,
0x78, 0x20, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73,
0x20, 0x74, 0x6f, 0x20, 0x66, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74,
0x20, 0x73, 0x68, 0x61, 0x64, 0x65, 0x72, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x66, 0x72, 0x61, 0x67, 0x50, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x6f, 0x6e,
0x20, 0x3d, 0x20, 0x76, 0x65, 0x63, 0x33, 0x28, 0x6d, 0x61, 0x74, 0x4d,
0x6f, 0x64, 0x65, 0x6c, 0x20, 0x2a, 0x20, 0x76, 0x65, 0x63, 0x34, 0x28,
0x76, 0x65, 0x72, 0x74, 0x65, 0x78, 0x50, 0x6f, 0x73, 0x69, 0x74, 0x69,
0x6f, 0x6e, 0x2c, 0x20, 0x31, 0x2e, 0x30, 0x29, 0x29, 0x3b, 0x0a, 0x20,
0x20, 0x20, 0x20, 0x66, 0x72, 0x61, 0x67, 0x54, 0x65, 0x78, 0x43, 0x6f,
0x6f, 0x72, 0x64, 0x20, 0x3d, 0x20, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78,
0x54, 0x65, 0x78, 0x43, 0x6f, 0x6f, 0x72, 0x64, 0x3b, 0x0a, 0x20, 0x20,
0x20, 0x20, 0x66, 0x72, 0x61, 0x67, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x20,
0x3d, 0x20, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78, 0x43, 0x6f, 0x6c, 0x6f,
0x72, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x66, 0x72,
0x61, 0x67, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x20, 0x3d, 0x20, 0x6e,
0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x28, 0x76, 0x65, 0x63,
0x33, 0x28, 0x6d, 0x61, 0x74, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x20,
0x2a, 0x20, 0x76, 0x65, 0x63, 0x34, 0x28, 0x76, 0x65, 0x72, 0x74, 0x65,
0x78, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x2c, 0x20, 0x31, 0x2e, 0x30,
0x29, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x72, 0x61,
0x67, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x20, 0x3d, 0x20, 0x6e, 0x6f,
0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x28, 0x76, 0x65, 0x72, 0x74,
0x65, 0x78, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x29, 0x3b, 0x0a, 0x0a,
0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x63, 0x75,
0x6c, 0x61, 0x74, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x20, 0x76,
0x65, 0x72, 0x74, 0x65, 0x78, 0x20, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69,
0x6f, 0x6e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x67, 0x6c, 0x5f, 0x50, 0x6f,
0x73, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x3d, 0x20, 0x6d, 0x76, 0x70,
0x20, 0x2a, 0x20, 0x76, 0x65, 0x63, 0x34, 0x28, 0x76, 0x65, 0x72, 0x74,
0x65, 0x78, 0x50, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x2c, 0x20,
0x31, 0x2e, 0x30, 0x29, 0x3b, 0x0a, 0x7d
};
unsigned int base_vs_len = 799;
107 changes: 107 additions & 0 deletions pufferlib/ocean/codeball/codeball.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#include <stdio.h>
#include "codeball.h"
#include "puffernet.h"
#include "renderer.h"
#include <sys/time.h>

#if defined(PLATFORM_DESKTOP)
#define GLSL_VERSION 330
#else // PLATFORM_ANDROID, PLATFORM_WEB
#define GLSL_VERSION 100
#endif

#define NETWORK_CONTROLLED 0

#define SLOWDOWN 1

int main() {
srand(time(NULL)); // Seed the random number generator
Client* client = make_client();

int n_robots = 8;
int obs_size = (n_robots + 2) * 9;
int action_size = 8;
#if NETWORK_CONTROLLED
Weights* weights =
load_weights("../../resources/codeball_weights.bin", 142601);
LinearLSTM* net = make_linearlstm(weights, n_robots, obs_size, action_size);
float observation_buffer[n_robots * obs_size];
int action_buffer[n_robots * action_size];
#endif

CodeBall env;
env.n_robots = n_robots;
env.n_nitros = 4;
env.frame_skip = 1;
allocate(&env);
env.actions = (float*)calloc(n_robots * 4, sizeof(float));
reset(&env);

struct timeval start, end;
gettimeofday(&start, NULL);

int initial_steps = 2;

for (int i = 0; i < 10000; i++) {
if (WindowShouldClose()) break;

if (i % SLOWDOWN == 0) {
#if NETWORK_CONTROLLED
if (env.terminal) {
free_linearlstm(net);
weights->idx = 0;
net = make_linearlstm(weights, n_robots, obs_size, action_size);
}
make_observation(&env, observation_buffer);
forward_linearlstm(net, observation_buffer, action_buffer);
for (int j = 0; j < n_robots; j++) {
int vel_action = action_buffer[j];
if (vel_action == 4) {
vel_action = 8;
}
int vel_x = vel_action % 3 - 1;
int vel_z = vel_action / 3 - 1;
env.actions[j * 4] = vel_x * ROBOT_MAX_GROUND_SPEED;
env.actions[j * 4 + 1] = 0;
env.actions[j * 4 + 2] = vel_z * ROBOT_MAX_GROUND_SPEED;
env.actions[j * 4 + 3] = 0;
}
#else
for (int j = 0; j < env.n_robots; j++)
{
Vec3D tgt =
vec3d_subtract(env.ball.position, env.robots[j].position);
for (int k = 0; k < env.n_robots; k++) {
if (k != j) {
Vec3D diff = vec3d_subtract(env.robots[k].position, env.robots[j].position);
double diff_len = vec3d_length(diff);
if (diff_len < 2.5) {
tgt = vec3d_multiply(diff, -1.0);
}
}
}
tgt = vec3d_multiply(tgt, ROBOT_MAX_GROUND_SPEED);
env.actions[j * 4] = tgt.x;
env.actions[j * 4 + 1] = tgt.z;
}
#endif
step(&env);
}
if (i == initial_steps) {
gettimeofday(&end, NULL);
double elapsed = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf("%d steps took %f seconds\n", initial_steps, elapsed);
printf("SPS: \t%f\n", ((double)initial_steps) / elapsed);
}
if (i > initial_steps) {
render(client, &env);
}
}

close_client(client);
free(env.actions);
free_allocated(&env);

return 0;
}
Loading