diff --git a/pufferlib/ocean/artillery/artillery.c b/pufferlib/ocean/artillery/artillery.c new file mode 100644 index 000000000..78c849705 --- /dev/null +++ b/pufferlib/ocean/artillery/artillery.c @@ -0,0 +1,78 @@ +#include "artillery.h" +#include "puffernet.h" + +void allocate(Artillery* env) { + init(env); + int obs_size = 11; + env->observations = (float*)calloc(obs_size, sizeof(float)); + env->actions = (int*)calloc(2, sizeof(int)); + env->rewards = (float*)calloc(1, sizeof(float)); + env->terminals = (unsigned char*)calloc(1, sizeof(unsigned char)); +} + +void free_allocated(Artillery* env) { + free(env->actions); + free(env->observations); + free(env->terminals); + free(env->rewards); + c_close(env); +} + +void demo() { + Artillery env = { + .adj = 0.014364991132735087, + .dist_fade = 0.11107816724558334, + .frameskip = 1, + .miss_penalty = -0.05521742600140713, + .max_reward_dist = 44.700659812685586, + .max_dist0 = 104.59870905085756, + .out_bounds_penalty = -0.1, + .turn_penalty = -0.1, + .turn_penalty_delay = 98, + .render = 1, + }; + allocate(&env); + + env.client = make_client(&env); + + Weights* weights = load_weights("resources/artillery/puffer_artillery_weights.bin", 135051); + int logit_sizes[2] = {5, 5}; + int obs_size = 11; + LinearLSTM* net = make_linearlstm(weights, 1, obs_size, logit_sizes, 2); + + c_reset(&env); + SetTargetFPS(30); + while (!WindowShouldClose()) { + if (IsKeyDown(KEY_LEFT_SHIFT)) { + env.actions[0] = 4.0f; + env.actions[1] = 4.0f; + + if (IsKeyDown(KEY_SPACE)) env.actions[0] = 0; + if (IsKeyDown(KEY_W)) env.actions[0] = 1; + if (IsKeyDown(KEY_S)) env.actions[0] = 2; + if (IsKeyDown(KEY_A)) env.actions[0] = 3; + if (IsKeyDown(KEY_D)) env.actions[0] = 4; + + if (IsKeyDown(KEY_ENTER)) env.actions[1] = 0; + if (IsKeyDown(KEY_UP)) env.actions[1] = 1; + if (IsKeyDown(KEY_DOWN)) env.actions[1] = 2; + if (IsKeyDown(KEY_LEFT)) env.actions[1] = 3; + if (IsKeyDown(KEY_RIGHT)) env.actions[1] = 4; + } else { + int discrete_actions[2]; + forward_linearlstm(net, env.observations, discrete_actions); + env.actions[0] = discrete_actions[0]; + env.actions[1] = discrete_actions[1]; + } + c_step(&env); + c_render(&env); + } + free_linearlstm(net); + free(weights); + free_allocated(&env); + close_client(env.client); +} + +int main() { + demo(); +} diff --git a/pufferlib/ocean/artillery/artillery.h b/pufferlib/ocean/artillery/artillery.h new file mode 100644 index 000000000..a96ce4a72 --- /dev/null +++ b/pufferlib/ocean/artillery/artillery.h @@ -0,0 +1,454 @@ +#include +#include +#include +#include "raylib.h" + +#define WIDTH 1280 +#define INVWIDTH 1.0f / WIDTH +#define HEIGHT 720 +#define INVHEIGHT 1.0f / HEIGHT + +#define FIRE 0 +#define ADDPOWDER 1 +#define RMPOWDER 2 +#define AIMUP 3 +#define AIMDOWN 4 + +#define MINX1 600 +#define MAXX1 1230 +#define MINY1 300 +#define MAXY1 670 +#define MINAIMANGLE 0.56f +#define MAXAIMANGLE 1.56f + +#define MAX_PROJECTILE_TIME 60.0f +#define VCOEFF 150.0f + +#define TIMESTEP 0.25f + +typedef struct Log { + float perf; + float episode_return; + float score; + float scoreL; + float scoreR; + float distL; + float distR; + float episode_length; + float max_reward_distn; + float turn_penaltyn; + float acc1000; + float n; +} Log; + +typedef struct Client { + int render; +} Client; + +typedef struct Gun { + float powder; + float angle; + float px; + float py; + int projectile_active; + float projectile_time; + float v0; + float vx0; + float vy0; + float x0; + float y0; + float tx; + float ty; + float dist; + float score; + int fired; + float hit; + float turn_penaltyn; +} Gun; + +typedef struct Artillery { + Client* client; + Log log; + float* observations; + int* actions; + float* rewards; + unsigned char* terminals; + + float score; + float scoreL; + float scoreR; + int tick; + + Gun gun[2]; + + float g; + + float max_reward_dist; + float max_reward_distn; + float max_dist0; + float adj; + float dist_fade; + float turn_penalty; + float turn_penalty_delay; + float miss_penalty; + float out_bounds_penalty; + float vx; + + int frameskip; + int render; + + int runs; +} Artillery; + +void c_close(Artillery* env) { +} + +void add_log(Artillery* env) { + env->log.perf += 0.5f * (env->gun[0].hit + env->gun[1].hit); + env->log.episode_length += env->tick; + env->log.episode_return += env->score; + env->log.score += env->score; + env->log.scoreL += env->gun[0].score; + env->log.scoreR += env->gun[1].score; + env->log.distL += env->gun[0].dist; + env->log.distR += env->gun[1].dist; + env->log.max_reward_distn += env->max_reward_distn; + env->log.n += 1; + env->log.acc1000 += env->gun[1].score * (1500.0f - (env->gun[0].dist + env->gun[1].dist)); +} + +void compute_observations(Artillery* env) { + env->observations[0] = env->gun[0].powder; + env->observations[1] = env->gun[0].angle - MINAIMANGLE; + env->observations[2] = env->gun[0].tx * INVWIDTH; + env->observations[3] = env->gun[0].ty * INVHEIGHT; + + env->observations[4] = env->gun[1].powder; + env->observations[5] = env->gun[1].angle - MINAIMANGLE; + env->observations[6] = env->gun[1].tx * INVWIDTH; + env->observations[7] = env->gun[1].ty * INVHEIGHT; + + env->observations[8] = env->score; + env->observations[9] = env->tick * 0.01; + env->observations[10] = env->vx * 0.2f; +} + +void get_random_start(Artillery* env) { + env->gun[0].tx = (rand() % (WIDTH/2 - 100)) + WIDTH/2 + 50; + env->gun[0].ty = rand() % HEIGHT; + if (env->gun[0].ty < MINY1) env->gun[0].ty = MINY1; + if (env->gun[0].ty > MAXY1) env->gun[0].ty = MAXY1; + env->gun[0].angle = 0.7f; + env->gun[0].powder = 0.95f; + env->gun[0].x0 = 30.0f; + env->gun[0].y0 = 30.0f; + + env->gun[1].tx = (rand() % (WIDTH/2 - 100)) + 50; + env->gun[1].ty = rand() % HEIGHT; + if (env->gun[1].ty < MINY1) env->gun[1].ty = MINY1; + if (env->gun[1].ty > MAXY1) env->gun[1].ty = MAXY1; + env->gun[1].angle = 1.25f; + env->gun[1].powder = 0.75f; + env->gun[1].x0 = WIDTH - 30.0f; + env->gun[1].y0 = 30.0f; + + env->vx = 5 - rand() % 3; +} + +void reset_round(Artillery* env) { + env->terminals[0] = 0; + env->rewards[0] = 0; + get_random_start(env); + env->tick = 0; + env->runs += 1; + env->score = 0; + + for (int i = 0; i < 2; i++) { + env->gun[i].fired = 0; + env->gun[i].projectile_active = 0; + env->gun[i].projectile_time = 0.0f; + env->gun[i].score = 0; + env->gun[i].turn_penaltyn = 0; + env->gun[i].hit = 0.0f; + } + + env->max_reward_distn = env->max_dist0 - (int)(env->runs * env->dist_fade); + if (env->max_reward_distn < env->max_reward_dist) env->max_reward_distn = env->max_reward_dist; +} + +void c_reset(Artillery* env) { + compute_observations(env); + reset_round(env); +} + +void init(Artillery* env) { + env->runs = 0; + env->tick = 0; + env->g = 9.81f; + + for (int i = 0; i < 2; i++) { + env->gun[i].projectile_active = 0; + env->gun[i].projectile_time = 0.0f; + env->gun[i].dist = WIDTH; + env->gun[i].fired = 0; + } + + get_random_start(env); +} + +float calculate_parabola_closest_distance(Artillery* env, int gun_idx) { + Gun* gun = &env->gun[gun_idx]; + gun->v0 = gun->powder * VCOEFF; + + float agent_multiplier = (gun_idx == 0) ? 1.0f : -1.0f; + gun->vx0 = gun->v0 * cosf(gun->angle) * agent_multiplier; + gun->vy0 = gun->v0 * sinf(gun->angle); + + float tx = gun->tx; + float ty = gun->ty; + + float min_dist2 = 99999999.0f; + + float mid_x = (gun->x0 + tx) * 0.5f; + float mid_y = 0.0f; + int found_mid = 0; + + for (float t = 0; t < MAX_PROJECTILE_TIME; t += TIMESTEP) { + float x = gun->x0 + gun->vx0 * t; + float y = gun->y0 + gun->vy0 * t - 0.5f * env->g * t * t; + + tx = tx - agent_multiplier * env->vx; + + if (y < 0 || x < 0 || x > WIDTH) break; + + if (!found_mid && ((gun_idx == 0 && x >= mid_x) || (gun_idx == 1 && x <= mid_x))) { + mid_y = y; + found_mid = 1; + } + + float dx = x - tx; + float dy = y - ty; + float dist2 = dx * dx + dy * dy; + + if (dist2 < min_dist2) { + min_dist2 = dist2; + } + } + gun->dist = sqrt(min_dist2); + if (gun->dist < 15.0f) gun->hit = 1.0f; + + float traj_score = 0.0f; + if (found_mid) { + if (gun_idx == 0) { + traj_score = (mid_y < ty) ? 1.0f : 0.5f; + } else { + traj_score = (mid_y > ty * 2.0) ? 1.0f : 0.25f; + } + } + + return traj_score; +} + +void fire_projectile(Artillery* env, int gun_idx) { + Gun* gun = &env->gun[gun_idx]; + float traj_score = calculate_parabola_closest_distance(env, gun_idx); + float score; + + if (gun->dist >= env->max_reward_distn) { + score = env->miss_penalty; + } else { + score = 1.0f - (gun->dist / env->max_reward_distn); + } + + if (score > 0.0f) { + score = score * traj_score; + } + + gun->score += score; + env->score += score * 0.5f; + if (gun_idx == 0) env->scoreL += score * 0.5f; + if (gun_idx == 1) env->scoreR += score * 0.5f; + env->rewards[0] += score * 0.5f; + + if (env->render) { + gun->projectile_active = 1; + gun->projectile_time = 0.0f; + gun->px = gun->x0; + gun->py = gun->y0; + } +} + +void step_frame(Artillery* env, float action0, float action1) { + int actions[2] = {(int)action0, (int)action1}; + float adj = env->adj; + + for (int gun_idx = 0; gun_idx < 2; gun_idx++) { + Gun* gun = &env->gun[gun_idx]; + float action = actions[gun_idx]; + + if (!gun->projectile_active) { + if (action == FIRE) { + gun->fired = 1; + if (gun->projectile_active == 0) { + fire_projectile(env, gun_idx); + } + } else if (action == ADDPOWDER) { + if (gun->powder < 1.0f - adj) { + gun->powder += adj; + } else { + env->score += env->out_bounds_penalty; + } + } else if (action == RMPOWDER) { + if (gun->powder > adj) { + gun->powder -= adj; + } else { + env->score += env->out_bounds_penalty; + } + } else if (action == AIMUP) { + if (gun->angle < MAXAIMANGLE - adj) { + gun->angle += adj; + } else { + env->score += env->out_bounds_penalty; + } + } else if (action == AIMDOWN) { + if (gun->angle > MINAIMANGLE + adj) { + gun->angle -= adj; + } else { + env->score += env->out_bounds_penalty; + } + } + + if (action != FIRE) { + float turn_pen = (env->tick > env->turn_penalty_delay) ? env->turn_penalty : 0.0f; + gun->score += turn_pen; + env->score += turn_pen * 0.5f; + if (gun_idx == 0) env->scoreL += turn_pen * 0.5f; + if (gun_idx == 1) env->scoreR += turn_pen * 0.5f; + env->rewards[0] += turn_pen * 0.5f; + } + } else { // Projectile Active + gun->projectile_time += TIMESTEP; + gun->px = gun->x0 + gun->vx0 * gun->projectile_time; + gun->py = gun->y0 + gun->vy0 * gun->projectile_time - 0.5f * env->g * gun->projectile_time * gun->projectile_time; + } + gun->tx = gun->tx + (2 * gun_idx - 1) * env->vx; // target velocity vectors must be opposite + } + + int both_fired = env->gun[0].fired && env->gun[1].fired; + int projectiles_done = 1; + + if (env->render) { + for (int i = 0; i < 2; i++) { + Gun* gun = &env->gun[i]; + if (gun->projectile_active && gun->px > 0 && gun->px < WIDTH && gun->py > 0) { + projectiles_done = 0; + } + } + } + + if ((both_fired && (projectiles_done || !env->render)) || (env->score < -2.0f)) { + env->terminals[0] = 1; + add_log(env); + c_reset(env); + } +} + +void c_step(Artillery* env) { + env->terminals[0] = 0; + env->rewards[0] = 0.0; + + float action0 = env->actions[0]; + float action1 = env->actions[1]; + + for (int i = 0; i < env->frameskip; i++) { + env->tick += 1; + step_frame(env, action0, action1); + } + compute_observations(env); +} + +Client* make_client(Artillery* env) { + Client* client = (Client*)calloc(1, sizeof(Client)); + + InitWindow(WIDTH, HEIGHT, "PufferLib Artillery Dual"); + SetTargetFPS(30); + + return client; +} + +void close_client(Client* client) { + CloseWindow(); + free(client); +} + +void c_render(Artillery* env) { + env->render = 1; + if (env->client == NULL) { + env->client = make_client(env); + } + + if (IsKeyDown(KEY_ESCAPE)) { + exit(0); + } + if (IsKeyPressed(KEY_TAB)) { + ToggleFullscreen(); + } + + BeginDrawing(); + SetConfigFlags(FLAG_MSAA_4X_HINT); + ClearBackground((Color){135, 206, 235, 255}); + + DrawCircle(env->gun[0].tx, HEIGHT - env->gun[0].ty, 15, RED); + DrawCircle(env->gun[1].tx, HEIGHT - env->gun[1].ty, 15, BLUE); + + for (int gun_idx = 0; gun_idx < 2; gun_idx++) { + Gun* gun = &env->gun[gun_idx]; + + float barrel_length = 40.0f; + float barrel_width = 8.0f; + float barrel_x = gun->x0; + float barrel_y = HEIGHT - gun->y0; + + float angle_multiplier = (gun_idx == 0) ? 1.0f : -1.0f; + + Vector2 barrel_start = {barrel_x, barrel_y}; + Vector2 barrel_end = { + barrel_x + barrel_length * cosf(gun->angle) * angle_multiplier, + barrel_y - barrel_length * sinf(gun->angle) + }; + + Vector2 prev_point = {gun->x0, HEIGHT - gun->y0}; + int j = 0; + for (float t = TIMESTEP; t < MAX_PROJECTILE_TIME; t += 0.5f) { + + + float v0 = gun->powder * VCOEFF; + float vx0 = v0 * cosf(gun->angle) * angle_multiplier; + float vy0 = v0 * sinf(gun->angle); + + float x = gun->x0 + vx0 * t; + float y = gun->y0 + vy0 * t - 0.5f * env->g * t * t; + + if (y < 0 || x < 0 || x > WIDTH) break; + + Vector2 current_point = {x, HEIGHT - y}; + if (j % 2 == 0) DrawLineV(prev_point, current_point, BLACK); + prev_point = current_point; + j += 1; + } + + Color gun_color = (gun_idx == 0) ? DARKGRAY : DARKBLUE; + DrawLineEx(barrel_start, barrel_end, barrel_width, gun_color); + DrawCircle(barrel_x, barrel_y, 12.0f, gun_color); + + if (gun->projectile_active) { + Color proj_color = (gun_idx == 0) ? RED : BLUE; + DrawCircle(gun->px, HEIGHT - gun->py, 4.0f, proj_color); + } + } + + DrawText(TextFormat("Score: %.3f", env->score), 10, 10, 20, BLACK); + DrawText(TextFormat("Gun0: %.3f Gun1: %.3f", env->gun[0].score, env->gun[1].score), 10, 35, 20, BLACK); + + EndDrawing(); +} diff --git a/pufferlib/ocean/artillery/artillery.py b/pufferlib/ocean/artillery/artillery.py new file mode 100644 index 000000000..209bbd6a3 --- /dev/null +++ b/pufferlib/ocean/artillery/artillery.py @@ -0,0 +1,77 @@ +import numpy as np +import gymnasium + +import pufferlib +from pufferlib.ocean.artillery import binding + +class Artillery(pufferlib.PufferEnv): + def __init__(self, num_envs=1, render_mode=None, + frameskip=1, + max_reward_dist=45, + adj=0.0144, dist_fade=0.11, turn_penalty_delay=98, max_dist0=105, + turn_penalty=-0.1, miss_penalty=-0.055, render=1, out_bounds_penalty=-0.1, + log_interval=128, + seed=7, + buf=None): + obs_size = 11 + self.single_observation_space = gymnasium.spaces.Box(low=0, high=1, shape=(obs_size,), dtype=np.float32) + self.render_mode = render_mode + self.num_agents = num_envs + self.log_interval = log_interval + self.tick = 0 + + self.single_action_space = gymnasium.spaces.MultiDiscrete([5, 5]) + + super().__init__(buf) + + self.actions = self.actions + + self.c_envs = binding.vec_init( + self.observations, self.actions, self.rewards, self.terminals, self.truncations, num_envs, + seed, num_envs=num_envs, seed=seed, frameskip=frameskip, + max_reward_dist=max_reward_dist, + adj=adj, dist_fade=dist_fade, turn_penalty_delay=turn_penalty_delay, max_dist0=max_dist0, + turn_penalty=turn_penalty, miss_penalty=miss_penalty, render=render, + out_bounds_penalty=out_bounds_penalty + ) + + def reset(self, seed=0): + binding.vec_reset(self.c_envs, seed) + self.tick = 0 + return self.observations, [] + + def step(self, actions): + self.actions[:] = actions + + self.tick += 1 + binding.vec_step(self.c_envs) + + info = [] + if self.tick % self.log_interval == 0: + info.append(binding.vec_log(self.c_envs)) + + return (self.observations, self.rewards, + self.terminals, self.truncations, info) + + def render(self): + binding.vec_render(self.c_envs, 0) + + def close(self): + binding.vec_close(self.c_envs) + +def test_performance(timeout=10, atn_cache=1024): + env = Artillery(num_envs=1) + env.reset() + tick = 0 + + actions = np.random.randint(0, 5, (atn_cache, env.num_agents, 2)) + + import time + start = time.time() + while time.time() - start < timeout: + atn = actions[tick % atn_cache] + env.step(atn) + tick += 1 + +if __name__ == '__main__': + test_performance() diff --git a/pufferlib/ocean/artillery/binding.c b/pufferlib/ocean/artillery/binding.c new file mode 100644 index 000000000..23e021513 --- /dev/null +++ b/pufferlib/ocean/artillery/binding.c @@ -0,0 +1,35 @@ +#include "artillery.h" + +#define Env Artillery +#include "../env_binding.h" + +static int my_init(Env* env, PyObject* args, PyObject* kwargs) { + env->frameskip = unpack(kwargs, "frameskip"); + env->render = unpack(kwargs, "render"); + env->adj = unpack(kwargs, "adj"); + env->max_reward_dist = unpack(kwargs, "max_reward_dist"); + env->dist_fade = unpack(kwargs, "dist_fade"); + env->turn_penalty = unpack(kwargs, "turn_penalty"); + env->turn_penalty_delay = unpack(kwargs, "turn_penalty_delay"); + env->miss_penalty = unpack(kwargs, "miss_penalty"); + env->max_dist0 = unpack(kwargs, "max_dist0"); + env->out_bounds_penalty = unpack(kwargs, "out_bounds_penalty"); + + init(env); + return 0; +} + +static int my_log(PyObject* dict, Log* log) { + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "scoreL", log->scoreL); + assign_to_dict(dict, "scoreR", log->scoreR); + assign_to_dict(dict, "distL", log->distL); + assign_to_dict(dict, "distR", log->distR); + assign_to_dict(dict, "episode_length", log->episode_length); + assign_to_dict(dict, "max_reward_distn", log->max_reward_distn); + assign_to_dict(dict, "acc1000", log->acc1000); + assign_to_dict(dict, "perf", log->perf); + //assign_to_dict(dict, "sigman", log->sigman); + return 0; +} diff --git a/pufferlib/ocean/artillery/runs.md b/pufferlib/ocean/artillery/runs.md new file mode 100644 index 000000000..caea24eaf --- /dev/null +++ b/pufferlib/ocean/artillery/runs.md @@ -0,0 +1,2 @@ +- artym1 whole-disco-483 39b3er39 + - acc100 956 \ No newline at end of file diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py index 93df76506..fdf643916 100644 --- a/pufferlib/ocean/environment.py +++ b/pufferlib/ocean/environment.py @@ -117,6 +117,7 @@ def make_multiagent(buf=None, **kwargs): return pufferlib.emulation.PettingZooPufferEnv(env=env, buf=buf) MAKE_FUNCTIONS = { + 'artillery': 'Artillery', 'battle': 'Battle', 'breakout': 'Breakout', 'blastar': 'Blastar', diff --git a/pufferlib/resources/artillery/puffer_artillery_weights.bin b/pufferlib/resources/artillery/puffer_artillery_weights.bin new file mode 100644 index 000000000..1ccbf4ed1 Binary files /dev/null and b/pufferlib/resources/artillery/puffer_artillery_weights.bin differ diff --git a/scripts/train_ocean.sh b/scripts/train_ocean.sh index 797f079bc..a097fce55 100755 --- a/scripts/train_ocean.sh +++ b/scripts/train_ocean.sh @@ -1,6 +1,7 @@ #!/bin/bash environments=( + "puffer_artymulti" "puffer_breakout" "puffer_connect4" "puffer_pong"