PufferAI · neverix · Dec 14, 2024 · Dec 14, 2024 · Dec 14, 2024 · Dec 14, 2024
diff --git a/.gitignore b/.gitignore
@@ -139,3 +139,13 @@ checkpoints/
 experiments/
 wandb/
 raylib/
+
+raylib_wasm/*
+c_gae.c
+pufferlib/puffernet.c
+pufferlib/extensions.c
+pufferlib/ocean/tactical/c_tactical.c
+pufferlib/ocean/grid/c_grid.c
+.idea/
+*.dSYM/
+/codeball
diff --git a/codeball_test.py b/codeball_test.py
@@ -0,0 +1,33 @@
+from pufferlib.ocean.torch import MLPPolicy, Recurrent
+from pufferlib.ocean.codeball.codeball import CodeBall
+from tqdm import trange
+import numpy as np
+import torch
+import os
+
+
+env = CodeBall(num_envs=1, n_robots=8, scripted_opponent_type="zero", frame_skip=5)
+obs, _ = env.reset()
+
+
+# pol = MLPPolicy(env)
+# rnn = Recurrent(env, pol)
+wp = None
+if wp is None:
+    from glob import glob
+    wp = max(glob("experiments/**/model_*.pt", recursive=True), key=lambda f: os.path.getmtime(f))
+rnn = torch.load(wp, map_location='cpu')
+rnn_state = None
+torch.set_grad_enabled(False)
+for _ in (bar := trange(10_000)):
+    obs = torch.from_numpy(obs).float()
+    actions, logprob, entropy, value, rnn_state = rnn(obs, rnn_state)
+    # logits, value, rnn_state = rnn.policy(obs, rnn_state)
+    # actions = logits
+    actions = actions.numpy()
+    obs, rewards, terminated, truncated, info = env.step(actions)
+    bar.set_postfix(val=(value.reshape(-1, 2).mean(0).tolist()))
+    if (terminated | truncated).any():
+        rnn_state = None
+    env.render()
+env.close()
diff --git a/config/ocean/codeball.ini b/config/ocean/codeball.ini
@@ -0,0 +1,70 @@
+[base]
+package = ocean
+env_name = puffer_codeball
+policy_name = Policy
+; policy_name = MLPPolicy
+rnn_name = Recurrent
+
+[rnn]
+; input_size = 256
+; hidden_size = 256
+; num_layers = 3
+
+[env]
+num_envs = 32
+n_robots = 8
+n_nitros = 4
+frame_skip = 3
+max_steps = 1500
+scripted_opponent_type = zero
+goal_scored_reward = 5.0
+loiter_penalty = 0.0
+ball_reward = 0.05
+
+[train]
+total_timesteps = 200_000_000
+num_envs = 2
+num_workers = 2
+; env_batch_size = 1
+batch_size = 65536
+update_epochs = 1
+; minibatch_size = 32768
+bptt_horizon = 16
+anneal_lr = False
+; gae_lambda = 0.9776227170639571
+; gamma = 0.8567482546637853
+; clip_coef = 0.011102333784435113
+; vf_coef = 0.3403069830175013
+; vf_clip_coef = 0.26475190539131727
+max_grad_norm = 0.8660179376602173
+; ent_coef = 0.01376980586465873
+learning_rate = 0.002064722899262613
+; learning_rate = 0.0005978428084749276
+minibatch_size = 4096
+; bptt_horizon = 16
+; anneal_lr = False
+
+; gamma = 0.5
+; gamma = 0.95
+; gamma = 0.95
+gamma = 0.98
+; gamma = 0.0
+; gae_lambda = 0.98
+gae_lambda = 0.98
+
+; gamma = 0.9257755108746066
+; gae_lambda = 0.8783667470139129
+
+; gamma = 0.98
+; gae_lambda = 0.97
+; ent_coef = 0.0012080029654114927
+ent_coef = 0.0005
+; max_grad_norm = 0.3808319568634033
+vf_coef = 0.6
+checkpoint_interval = 20
+; device = cuda
+device = cpu
+
+[sweep.metric]
+goal = maximize
+name = environment/reward
diff --git a/pufferlib/ocean/codeball/.gitignore b/pufferlib/ocean/codeball/.gitignore
@@ -0,0 +1,2 @@
+codeball
+setup.py
diff --git a/pufferlib/ocean/codeball/Makefile b/pufferlib/ocean/codeball/Makefile
@@ -0,0 +1,38 @@
+CFILES=codeball.c
+HFILES=codeball.h renderer.h
+CFLAGS=\
+	-lm -I../../../raylib/include -I../../../pufferlib -lpthread ../../../raylib/lib/libraylib.a \
+	-O1 -g -fsanitize=address -fno-omit-frame-pointer \
+	-framework Cocoa -framework OpenGL -framework IOKit -framework CoreVideo -framework GLUT -framework AppKit -lglfw -lobjc \
+	# -O3 -march=native -funroll-loops
+SHADERS=base_vs.h fragment_fs.h
+
+ARCH=$(shell uname -m)
+PYTHON_VERSION=$(shell python -c 'import sys; print(str(sys.version_info.major) + str(sys.version_info.minor) + "-" + sys.platform)')
+PYTHON_SO_POSTFIX=cpython-${PYTHON_VERSION}.so
+
+clean:
+	rm -f codeball *.so cy_codeball.c
+
+base_vs.h: base.vs
+	xxd -i base.vs > base_vs.h
+
+fragment_fs.h: fragment.fs
+	xxd -i fragment.fs > fragment_fs.h
+
+codeball: $(CFILES) $(HFILES) Makefile $(SHADERS)
+	gcc $(CFLAGS) $(CFILES) -o codeball
+
+codeball.pxd: cy_codeball.pyx codeball.h
+	autopxd codeball.h codeball.pxd
+
+cy_codeball.${PYTHON_SO_POSTFIX}: $(CFILES) $(HFILES) $(SHADERS) cy_codeball.pyx codeball.pxd renderer.pxd setup.py
+	python setup.py build_ext --inplace
+
+run_py: cy_codeball.${PYTHON_SO_POSTFIX}
+	python -m codeball
+
+run: codeball
+	./codeball
+
+.PHONY: clean run run_py
diff --git a/pufferlib/ocean/codeball/base.vs b/pufferlib/ocean/codeball/base.vs
@@ -0,0 +1,32 @@
+#version 330
+
+// Input vertex attributes
+in vec3 vertexPosition;
+in vec2 vertexTexCoord;
+in vec3 vertexNormal;
+in vec4 vertexColor;
+
+// Input uniform values
+uniform mat4 mvp;
+uniform mat4 matModel;
+uniform mat4 matNormal;
+
+// Output vertex attributes (to fragment shader)
+out vec3 fragPosition;
+out vec2 fragTexCoord;
+out vec4 fragColor;
+out vec3 fragNormal;
+
+// NOTE: Add here your custom variables
+
+void main() {
+    // Send vertex attributes to fragment shader
+    fragPosition = vec3(matModel * vec4(vertexPosition, 1.0));
+    fragTexCoord = vertexTexCoord;
+    fragColor = vertexColor;
+    // fragNormal = normalize(vec3(matNormal * vec4(vertexNormal, 1.0)));
+    fragNormal = normalize(vertexNormal);
+
+    // Calculate final vertex position
+    gl_Position = mvp * vec4(vertexPosition, 1.0);
+}
diff --git a/pufferlib/ocean/codeball/base_vs.h b/pufferlib/ocean/codeball/base_vs.h
@@ -0,0 +1,70 @@
+unsigned char base_vs[] = {
+  0x23, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x20, 0x33, 0x33, 0x30,
+  0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x76,
+  0x65, 0x72, 0x74, 0x65, 0x78, 0x20, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62,
+  0x75, 0x74, 0x65, 0x73, 0x0a, 0x69, 0x6e, 0x20, 0x76, 0x65, 0x63, 0x33,
+  0x20, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78, 0x50, 0x6f, 0x73, 0x69, 0x74,
+  0x69, 0x6f, 0x6e, 0x3b, 0x0a, 0x69, 0x6e, 0x20, 0x76, 0x65, 0x63, 0x32,
+  0x20, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78, 0x54, 0x65, 0x78, 0x43, 0x6f,
+  0x6f, 0x72, 0x64, 0x3b, 0x0a, 0x69, 0x6e, 0x20, 0x76, 0x65, 0x63, 0x33,
+  0x20, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78, 0x4e, 0x6f, 0x72, 0x6d, 0x61,
+  0x6c, 0x3b, 0x0a, 0x69, 0x6e, 0x20, 0x76, 0x65, 0x63, 0x34, 0x20, 0x76,
+  0x65, 0x72, 0x74, 0x65, 0x78, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x3b, 0x0a,
+  0x0a, 0x2f, 0x2f, 0x20, 0x49, 0x6e, 0x70, 0x75, 0x74, 0x20, 0x75, 0x6e,
+  0x69, 0x66, 0x6f, 0x72, 0x6d, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73,
+  0x0a, 0x75, 0x6e, 0x69, 0x66, 0x6f, 0x72, 0x6d, 0x20, 0x6d, 0x61, 0x74,
+  0x34, 0x20, 0x6d, 0x76, 0x70, 0x3b, 0x0a, 0x75, 0x6e, 0x69, 0x66, 0x6f,
+  0x72, 0x6d, 0x20, 0x6d, 0x61, 0x74, 0x34, 0x20, 0x6d, 0x61, 0x74, 0x4d,
+  0x6f, 0x64, 0x65, 0x6c, 0x3b, 0x0a, 0x75, 0x6e, 0x69, 0x66, 0x6f, 0x72,
+  0x6d, 0x20, 0x6d, 0x61, 0x74, 0x34, 0x20, 0x6d, 0x61, 0x74, 0x4e, 0x6f,
+  0x72, 0x6d, 0x61, 0x6c, 0x3b, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x4f, 0x75,
+  0x74, 0x70, 0x75, 0x74, 0x20, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78, 0x20,
+  0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x20, 0x28,
+  0x74, 0x6f, 0x20, 0x66, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x20,
+  0x73, 0x68, 0x61, 0x64, 0x65, 0x72, 0x29, 0x0a, 0x6f, 0x75, 0x74, 0x20,
+  0x76, 0x65, 0x63, 0x33, 0x20, 0x66, 0x72, 0x61, 0x67, 0x50, 0x6f, 0x73,
+  0x69, 0x74, 0x69, 0x6f, 0x6e, 0x3b, 0x0a, 0x6f, 0x75, 0x74, 0x20, 0x76,
+  0x65, 0x63, 0x32, 0x20, 0x66, 0x72, 0x61, 0x67, 0x54, 0x65, 0x78, 0x43,
+  0x6f, 0x6f, 0x72, 0x64, 0x3b, 0x0a, 0x6f, 0x75, 0x74, 0x20, 0x76, 0x65,
+  0x63, 0x34, 0x20, 0x66, 0x72, 0x61, 0x67, 0x43, 0x6f, 0x6c, 0x6f, 0x72,
+  0x3b, 0x0a, 0x6f, 0x75, 0x74, 0x20, 0x76, 0x65, 0x63, 0x33, 0x20, 0x66,
+  0x72, 0x61, 0x67, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x3b, 0x0a, 0x0a,
+  0x2f, 0x2f, 0x20, 0x4e, 0x4f, 0x54, 0x45, 0x3a, 0x20, 0x41, 0x64, 0x64,
+  0x20, 0x68, 0x65, 0x72, 0x65, 0x20, 0x79, 0x6f, 0x75, 0x72, 0x20, 0x63,
+  0x75, 0x73, 0x74, 0x6f, 0x6d, 0x20, 0x76, 0x61, 0x72, 0x69, 0x61, 0x62,
+  0x6c, 0x65, 0x73, 0x0a, 0x0a, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x6d, 0x61,
+  0x69, 0x6e, 0x28, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f,
+  0x2f, 0x20, 0x53, 0x65, 0x6e, 0x64, 0x20, 0x76, 0x65, 0x72, 0x74, 0x65,
+  0x78, 0x20, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73,
+  0x20, 0x74, 0x6f, 0x20, 0x66, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74,
+  0x20, 0x73, 0x68, 0x61, 0x64, 0x65, 0x72, 0x0a, 0x20, 0x20, 0x20, 0x20,
+  0x66, 0x72, 0x61, 0x67, 0x50, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x6f, 0x6e,
+  0x20, 0x3d, 0x20, 0x76, 0x65, 0x63, 0x33, 0x28, 0x6d, 0x61, 0x74, 0x4d,
+  0x6f, 0x64, 0x65, 0x6c, 0x20, 0x2a, 0x20, 0x76, 0x65, 0x63, 0x34, 0x28,
+  0x76, 0x65, 0x72, 0x74, 0x65, 0x78, 0x50, 0x6f, 0x73, 0x69, 0x74, 0x69,
+  0x6f, 0x6e, 0x2c, 0x20, 0x31, 0x2e, 0x30, 0x29, 0x29, 0x3b, 0x0a, 0x20,
+  0x20, 0x20, 0x20, 0x66, 0x72, 0x61, 0x67, 0x54, 0x65, 0x78, 0x43, 0x6f,
+  0x6f, 0x72, 0x64, 0x20, 0x3d, 0x20, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78,
+  0x54, 0x65, 0x78, 0x43, 0x6f, 0x6f, 0x72, 0x64, 0x3b, 0x0a, 0x20, 0x20,
+  0x20, 0x20, 0x66, 0x72, 0x61, 0x67, 0x43, 0x6f, 0x6c, 0x6f, 0x72, 0x20,
+  0x3d, 0x20, 0x76, 0x65, 0x72, 0x74, 0x65, 0x78, 0x43, 0x6f, 0x6c, 0x6f,
+  0x72, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x66, 0x72,
+  0x61, 0x67, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x20, 0x3d, 0x20, 0x6e,
+  0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x28, 0x76, 0x65, 0x63,
+  0x33, 0x28, 0x6d, 0x61, 0x74, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x20,
+  0x2a, 0x20, 0x76, 0x65, 0x63, 0x34, 0x28, 0x76, 0x65, 0x72, 0x74, 0x65,
+  0x78, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x2c, 0x20, 0x31, 0x2e, 0x30,
+  0x29, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x72, 0x61,
+  0x67, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x20, 0x3d, 0x20, 0x6e, 0x6f,
+  0x72, 0x6d, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x28, 0x76, 0x65, 0x72, 0x74,
+  0x65, 0x78, 0x4e, 0x6f, 0x72, 0x6d, 0x61, 0x6c, 0x29, 0x3b, 0x0a, 0x0a,
+  0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x63, 0x75,
+  0x6c, 0x61, 0x74, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x20, 0x76,
+  0x65, 0x72, 0x74, 0x65, 0x78, 0x20, 0x70, 0x6f, 0x73, 0x69, 0x74, 0x69,
+  0x6f, 0x6e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x67, 0x6c, 0x5f, 0x50, 0x6f,
+  0x73, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x3d, 0x20, 0x6d, 0x76, 0x70,
+  0x20, 0x2a, 0x20, 0x76, 0x65, 0x63, 0x34, 0x28, 0x76, 0x65, 0x72, 0x74,
+  0x65, 0x78, 0x50, 0x6f, 0x73, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x2c, 0x20,
+  0x31, 0x2e, 0x30, 0x29, 0x3b, 0x0a, 0x7d
+};
+unsigned int base_vs_len = 799;
diff --git a/pufferlib/ocean/codeball/codeball.c b/pufferlib/ocean/codeball/codeball.c
@@ -0,0 +1,107 @@
+#include <stdio.h>
+#include "codeball.h"
+#include "puffernet.h"
+#include "renderer.h"
+#include <sys/time.h>
+
+#if defined(PLATFORM_DESKTOP)
+#define GLSL_VERSION 330
+#else  // PLATFORM_ANDROID, PLATFORM_WEB
+#define GLSL_VERSION 100
+#endif
+
+#define NETWORK_CONTROLLED 0
+
+#define SLOWDOWN 1
+
+int main() {
+    srand(time(NULL)); // Seed the random number generator
+    Client* client = make_client();
+
+    int n_robots = 8;
+    int obs_size = (n_robots + 2) * 9;
+    int action_size = 8;
+    #if NETWORK_CONTROLLED
+    Weights* weights =
+        load_weights("../../resources/codeball_weights.bin", 142601);
+    LinearLSTM* net = make_linearlstm(weights, n_robots, obs_size, action_size);
+    float observation_buffer[n_robots * obs_size];
+    int action_buffer[n_robots * action_size];
+    #endif
+
+    CodeBall env;
+    env.n_robots = n_robots;
+    env.n_nitros = 4;
+    env.frame_skip = 1;
+    allocate(&env);
+    env.actions = (float*)calloc(n_robots * 4, sizeof(float));
+    reset(&env);
+
+    struct timeval start, end;
+    gettimeofday(&start, NULL);
+
+    int initial_steps = 2;
+
+    for (int i = 0; i < 10000; i++) {
+        if (WindowShouldClose()) break;
+
+        if (i % SLOWDOWN == 0) {
+            #if NETWORK_CONTROLLED
+            if (env.terminal) {
+                free_linearlstm(net);
+                weights->idx = 0;
+                net = make_linearlstm(weights, n_robots, obs_size, action_size);
+            }
+            make_observation(&env, observation_buffer);
+            forward_linearlstm(net, observation_buffer, action_buffer);
+            for (int j = 0; j < n_robots; j++) {
+                int vel_action = action_buffer[j];
+                if (vel_action == 4) {
+                    vel_action = 8;
+                }
+                int vel_x = vel_action % 3 - 1;
+                int vel_z = vel_action / 3 - 1;
+                env.actions[j * 4] = vel_x * ROBOT_MAX_GROUND_SPEED;
+                env.actions[j * 4 + 1] = 0;
+                env.actions[j * 4 + 2] = vel_z * ROBOT_MAX_GROUND_SPEED;
+                env.actions[j * 4 + 3] = 0;
+            }
+            #else
+            for (int j = 0; j < env.n_robots; j++)
+            {
+                Vec3D tgt =
+                    vec3d_subtract(env.ball.position, env.robots[j].position);
+                for (int k = 0; k < env.n_robots; k++) {
+                    if (k != j) {
+                        Vec3D diff = vec3d_subtract(env.robots[k].position, env.robots[j].position);
+                        double diff_len = vec3d_length(diff);
+                        if (diff_len < 2.5) {
+                            tgt = vec3d_multiply(diff, -1.0);
+                        }
+                    }
+                }
+                tgt = vec3d_multiply(tgt, ROBOT_MAX_GROUND_SPEED);
+                env.actions[j * 4] = tgt.x;
+                env.actions[j * 4 + 1] = tgt.z;
+            }
+            #endif
+            step(&env);
+        }
+        if (i == initial_steps) {
+            gettimeofday(&end, NULL);
+            double elapsed = (end.tv_sec - start.tv_sec) +
+                            (end.tv_usec - start.tv_usec) / 1000000.0;
+            printf("%d steps took %f seconds\n", initial_steps, elapsed);
+            printf("SPS: \t%f\n", ((double)initial_steps) / elapsed);
+        }
+        if (i > initial_steps) {
+            render(client, &env);
+        }
+    }
+
+    close_client(client);
+    free(env.actions);
+    free_allocated(&env);
+
+    return 0;
+}