From ab0f85792cfb99322f98be7060a5992a4ff71336 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Mon, 15 Sep 2025 23:04:58 +0000 Subject: [PATCH 001/188] Should be a stable standalone for multisweeps to test --- pufferlib/config/ocean/pong.ini | 4 ++ pufferlib/pufferl.py | 118 ++++++++++++++++++++++++++++++-- pufferlib/sweep.py | 18 ++++- 3 files changed, 131 insertions(+), 9 deletions(-) diff --git a/pufferlib/config/ocean/pong.ini b/pufferlib/config/ocean/pong.ini index a0bf24d93..7ff0960b1 100644 --- a/pufferlib/config/ocean/pong.ini +++ b/pufferlib/config/ocean/pong.ini @@ -31,6 +31,10 @@ vf_coef = 1.9960893747329385 vtrace_c_clip = 1.0873122745787867 vtrace_rho_clip = 2.784150207139061 +[sweep] +downsample = 0 +max_cost = 30 + [sweep.train.total_timesteps] distribution = log_normal min = 1e7 diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 5c2953a5c..9799f724c 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -198,7 +198,7 @@ def __init__(self, config, vecenv, policy, logger=None): # Dashboard self.model_size = sum(p.numel() for p in policy.parameters() if p.requires_grad) - self.print_dashboard(clear=True) + #self.print_dashboard(clear=True) @property def uptime(self): @@ -442,7 +442,7 @@ def train(self): if done_training or self.global_step == 0 or time.time() > self.last_log_time + 0.25: logs = self.mean_and_log() self.losses = losses - self.print_dashboard() + #self.print_dashboard() self.stats = defaultdict(list) self.last_log_time = time.time() self.last_log_step = self.global_step @@ -766,9 +766,9 @@ def run(self): time.sleep(self.delay) continue - self.gpu_util.append(torch.cuda.utilization()) - free, total = torch.cuda.mem_get_info() - self.gpu_mem.append(100*(total-free)/total) + #self.gpu_util.append(torch.cuda.utilization()) + #free, total = torch.cuda.mem_get_info() + #self.gpu_mem.append(100*(total-free)/total) else: self.gpu_util.append(0) self.gpu_mem.append(0) @@ -909,6 +909,9 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None): all_logs = [] while pufferl.global_step < train_config['total_timesteps']: + if pufferl.uptime > args['sweep']['max_cost']: + break + if train_config['device'] == 'cuda': torch.compiler.cudagraph_mark_step_begin() pufferl.evaluate() @@ -925,15 +928,19 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None): # rollouts within a fixed number of epochs) i = 0 stats = {} - while i < 32 or not stats: + uptime = pufferl.uptime + agent_steps = pufferl.global_step + while i < 128 or not stats: stats = pufferl.evaluate() i += 1 logs = pufferl.mean_and_log() + logs['uptime'] = uptime + logs['agent_steps'] = agent_steps if logs is not None: all_logs.append(logs) - pufferl.print_dashboard() + #pufferl.print_dashboard() model_path = pufferl.close() pufferl.logger.close(model_path) return all_logs @@ -994,6 +1001,100 @@ def eval(env_name, args=None, vecenv=None, policy=None): imageio.mimsave(args['gif_path'], frames, fps=args['fps'], loop=0) frames.append('Done') +def _sweep_worker(env_name, q_host, q_worker, device): + while True: + #print("Worker waiting") + args = q_worker.get() + #print("Worker got data") + args['train']['device'] = device + seed = time.time_ns() & 0xFFFFFFFF + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + try: + all_logs = train(env_name, args=args) + except Exception: + import traceback + traceback.print_exc() + + #all_logs = [{'foo': 0}] + #print("Worker ran experiment") + q_host.put(all_logs) + #print("Worker submitted result") + +def multisweep(args=None, env_name=None): + args = args or load_config(env_name) + sweep_gpus = args['sweep_gpus'] + if sweep_gpus == -1: + sweep_gpus = torch.cuda.device_count() + + method = args['sweep'].pop('method') + try: + sweep_cls = getattr(pufferlib.sweep, method) + except: + raise pufferlib.APIUsageError(f'Invalid sweep method {method}. See pufferlib.sweep') + + sweep = sweep_cls(args['sweep']) + points_per_run = args['sweep']['downsample'] + target_key = f'environment/{args["sweep"]["metric"]}' + + from multiprocessing import Process, Queue, set_start_method + from copy import deepcopy + + host_queues = [] + worker_queues = [] + workers = [] + worker_args = [] + set_start_method('spawn') + for i in range(sweep_gpus): + q_host = Queue() + q_worker = Queue() + w = Process( + target=_sweep_worker, + args=(env_name, q_host, q_worker, f'cuda:{i}') + ) + w.start() + host_queues.append(q_host) + worker_queues.append(q_worker) + args = deepcopy(args) + worker_args.append(args) + + for w in range(sweep_gpus): + args = worker_args[w] + sweep.suggest(args) + total_timesteps = args['train']['total_timesteps'] + worker_queues[w].put(args) + + runs = 0 + + suggestion = deepcopy(args) + while runs < args['max_runs']: + for w in range(sweep_gpus): + args = worker_args[w] + if host_queues[w].empty(): + continue + + all_logs = host_queues[w].get(timeout=0) + if not all_logs: + continue + + all_logs = [e for e in all_logs if target_key in e] + scores = downsample([log[target_key] for log in all_logs], points_per_run) + times = downsample([log['uptime'] for log in all_logs], points_per_run) + steps = downsample([log['agent_steps'] for log in all_logs], points_per_run) + #costs = np.stack([times, steps], axis=1) + costs = times + timesteps = [log['agent_steps'] for log in all_logs] + timesteps = downsample(timesteps, points_per_run) + for score, cost, timestep in zip(scores, costs, timesteps): + args['train']['total_timesteps'] = timestep + sweep.observe(args, score, cost) + + runs += 1 + + sweep.suggest(args) + worker_queues[w].put(args) + def sweep(args=None, env_name=None): args = args or load_config(env_name) if not args['wandb'] and not args['neptune']: @@ -1141,6 +1242,7 @@ def load_config(env_name): parser.add_argument('--neptune-name', type=str, default='pufferai') parser.add_argument('--neptune-project', type=str, default='ablations') parser.add_argument('--local-rank', type=int, default=0, help='Used by torchrun for DDP') + parser.add_argument('--sweep-gpus', type=int, default=-1, help='multigpu sweeps') parser.add_argument('--tag', type=str, default=None, help='Tag for experiment') args = parser.parse_known_args()[0] @@ -1205,6 +1307,8 @@ def main(): eval(env_name=env_name) elif mode == 'sweep': sweep(env_name=env_name) + elif mode == 'multisweep': + multisweep(env_name=env_name) elif mode == 'autotune': autotune(env_name=env_name) elif mode == 'profile': diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py index 3af13015b..28e7f1c8e 100644 --- a/pufferlib/sweep.py +++ b/pufferlib/sweep.py @@ -114,7 +114,7 @@ def unnormalize(self, value): def _params_from_puffer_sweep(sweep_config): param_spaces = {} for name, param in sweep_config.items(): - if name in ('method', 'metric', 'goal', 'downsample'): + if name in ('method', 'metric', 'goal', 'downsample', 'max_cost'): continue assert isinstance(param, dict) @@ -337,6 +337,8 @@ def __init__(self, suggestions_per_pareto = 256, seed_with_search_center = True, expansion_rate = 0.25, + buffer_size = 10, + max_cost = 30, ): self.hyperparameters = Hyperparameters(sweep_config) self.num_random_samples = num_random_samples @@ -347,6 +349,8 @@ def __init__(self, self.resample_frequency = resample_frequency self.max_suggestion_cost = max_suggestion_cost self.expansion_rate = expansion_rate + self.buffer_size = buffer_size + self.buffer = [] self.success_observations = [] self.failure_observations = [] @@ -357,6 +361,11 @@ def __init__(self, def suggest(self, fill): # TODO: Clip random samples to bounds so we don't get bad high cost samples + if len(self.buffer) > 0: + suggestion = self.buffer.pop() + print('Suggested') + return self.hyperparameters.to_dict(suggestion, fill), {} + info = {} self.suggestion_idx += 1 if len(self.success_observations) == 0 and self.seed_with_search_center: @@ -446,8 +455,13 @@ def suggest(self, fill): suggestion_scores = self.hyperparameters.optimize_direction * max_c_mask * ( gp_y_norm*weight) + mask = gp_c > 30 + suggestion_scores[mask.squeeze()] = -1e8 + + idxs = np.argsort(suggestion_scores)[::-1][:self.buffer_size] + best_idx = idxs[0] + self.buffer = [suggestions[i].numpy() for i in idxs[1:]] - best_idx = np.argmax(suggestion_scores) info = dict( cost = gp_c[best_idx].item(), score = gp_y[best_idx].item(), From 9f352eb16fd46b341c0354ec6cad6f0934e5f3df Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 16 Sep 2025 00:00:22 +0000 Subject: [PATCH 002/188] Stable multigpu hyper sweeps --- plottest.py | 281 ++++++++++++++++++++++++++++++++ pufferlib/config/ocean/pong.ini | 6 +- pufferlib/pufferl.py | 32 +++- 3 files changed, 308 insertions(+), 11 deletions(-) create mode 100644 plottest.py diff --git a/plottest.py b/plottest.py new file mode 100644 index 000000000..cf09b4d40 --- /dev/null +++ b/plottest.py @@ -0,0 +1,281 @@ +from dash import Dash, html, dcc +import plotly.graph_objects as go +import numpy as np +import json + +# Global styling variables +FONT_FAMILY = 'Arial' +FONT_SIZE_TITLE = 28 +FONT_SIZE_AXIS = 22 +FONT_SIZE_TICK = 20 +FONT_SIZE_LEGEND = 18 +FONT_COLOR = '#f1f1f1' +PLOT_BG_COLOR = '#061a1a' +PAPER_BG_COLOR = '#061a1a' +LINE_WIDTH = 4 +LINE_COLORS = ["#0000b3", "#0010d9", "#0020ff", "#0040ff", "#0060ff", "#0080ff", "#009fff", "#00bfff", "#00ffff"][::-1] +#['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] + +TITLE_FONT = dict( + family=FONT_FAMILY, + size=FONT_SIZE_TITLE, + color=FONT_COLOR +) + +AXIS_FONT = dict( + family=FONT_FAMILY, + size=FONT_SIZE_AXIS, + color=FONT_COLOR +) + +TICK_FONT = dict( + family=FONT_FAMILY, + size=FONT_SIZE_TICK, + color=FONT_COLOR +) + +LEGEND_FONT = dict( + family=FONT_FAMILY, + size=FONT_SIZE_LEGEND, + color=FONT_COLOR +) + +def rgba(hex, alpha): + return f"rgba({int(hex[1:3], 16)}, {int(hex[3:5], 16)}, {int(hex[5:7], 16)}, {alpha})" + +def mean_conf(xx, yy): + x_min = min([min(x) for x in xx]) + x_max = max([max(x) for x in xx]) + y_min = min([min(y) for y in yy]) + y_max = max([max(y) for y in yy]) + + x = np.linspace(x_min, x_max, 100) + y_interps = np.stack([ + np.interp(x, x_, y_) for x_, y_ in zip(xx, yy)]) + + mean = np.mean(y_interps, axis=0) + std = np.std(y_interps, axis=0) + conf = 1.96 * std / np.sqrt(len(xx)) + + return x, mean, conf + +def figure(title='The Puffer Frontier Project', + xlabel='Uptime', ylabel='Score', + legend='Trial', xaxis_type='linear'): + fig = go.Figure() + fig.update_layout( + title=dict(text=title, font=TITLE_FONT), + xaxis=dict(title=dict(text=xlabel, font=AXIS_FONT), tickfont=TICK_FONT), + yaxis=dict(title=dict(text=ylabel, font=AXIS_FONT), tickfont=TICK_FONT), + xaxis_type=xaxis_type, + showlegend=True, + legend=dict(font=LEGEND_FONT), + plot_bgcolor=PLOT_BG_COLOR, + paper_bgcolor=PAPER_BG_COLOR, + width=1280, + height=720, + autosize=False + ) + fig.update_xaxes(showgrid=False) + fig.update_yaxes(showgrid=False) + return fig + +def plot_lines(fig, xx, yy): + for i, (x, y) in enumerate(zip(xx, yy)): + fig.add_trace( + go.Scatter( + x=x, + y=y, + mode='lines', + name=f'Trial {i+1}', + line=dict( + color=LINE_COLORS[i % len(LINE_COLORS)], + width=LINE_WIDTH + ) + ) + ) + +def scatter(fig, x, y, c, legend='Trial', log_x=False, i=0): + mmin = min(c) + mmax = max(c) + #vals = [(c - mmin)/(mmax - mmin) for c in c] + #vals = [max(0.01, v) for v in vals] + + colors = [] + for e in c: + if e > 0.95*mmax: + v = 1.0 + elif e > 0.9*mmax: + v = 0.6 + elif e > 0.5*mmax: + v = 0.3 + else: + v = 0 + + colors.append(f'rgb(0, 0.5, {v})') + + #c = (np.array(c) - min(c))/(max(c) - min(c)) + fig.add_trace( + go.Scatter( + x=x, + y=y, + mode='markers', + name=legend, + marker=dict( + color=colors, + size=10 + ) + ) + ) + +def plot_group(fig, xx, yy, xlabel='Performance', legend='Trial', log_x=False, i=0): + x, mean, conf = mean_conf(xx, yy) + fig.add_trace( + go.Scatter( + x=np.concatenate([x, x[::-1]]), + y=np.concatenate([mean + conf, (mean - conf)[::-1]]), + fill='toself', + fillcolor=rgba(LINE_COLORS[i], 0.2), + line=dict( + color='rgba(255,255,255,0)', + width=LINE_WIDTH + ), + showlegend=False + ) + ) + fig.add_trace( + go.Scatter( + x=x, + y=mean, + mode='lines', + name=legend, + line=dict( + color=LINE_COLORS[i], + width=LINE_WIDTH + ) + ) + ) + +def pareto_points(steps, costs, scores): + pareto_steps = [] + pareto_costs = [] + pareto_scores = [] + max_score = max(scores) + for i in range(len(steps)): + if scores[i] < 0.25*max_score: + continue + + higher_score = [s for s in scores if s > scores[i]] + lower_steps = [s for s in steps if s < scores[i]] + lower_cost = [c for c in costs if c < costs[i]] + better = [scores[j] > scores[i] and + costs[j] < costs[i] and steps[j] < steps[i] + for j in range(len(scores))] + if not any(better): + pareto_steps.append(steps[i]) + pareto_costs.append(costs[i]) + pareto_scores.append(scores[i]) + + idxs = np.argsort(pareto_steps) + pareto_steps = [pareto_steps[i] for i in idxs] + pareto_costs = [pareto_costs[i] for i in idxs] + pareto_scores = [pareto_scores[i] for i in idxs] + return pareto_steps, pareto_costs, pareto_scores + +def load_seed_data(filename): + with open(filename, 'r') as f: + experiments = json.load(f) + + all_uptime = [] + all_perf = [] + for trial in experiments: + uptime = [] + perf = [] + for e in trial: + u = e['uptime'] + if 'environment/perf' not in e: + continue + + uptime.append(u) + perf.append(e['environment/perf']) + + all_uptime.append(uptime) + all_perf.append(perf) + + return all_uptime, all_perf + +def load_hyper_data(filename): + with open(filename, 'r') as f: + experiments = json.load(f) + + all_hyper = [] + all_perf = [] + for trial in experiments: + hyper = trial[-1]['learning_rate'] + perf = trial[-1]['environment/perf'] + all_hyper.append(hyper) + all_perf.append(perf) + + all_hyper = np.array(all_hyper).reshape(3, -1) + all_perf = np.array(all_perf).reshape(3, -1) + return all_hyper, all_perf + +def load_sweep_data(path): + import glob + costs = [] + steps = [] + scores = [] + for fpath in glob.glob(path): + with open(fpath, 'r') as f: + exp = json.load(f) + + cost = exp['cost'] + step = exp['total_timesteps'] + score = exp['data'][-1]['environment/score'] + costs.append(cost) + steps.append(step) + scores.append(score) + + return steps, costs, scores + + +def layout(): + fig1 = figure(title='Hyperparameter Ablation', xlabel='Learning Rate', legend='Ablate', xaxis_type='log') + #all_hyper, all_perf = load_hyper_data('puffer_pong_learning_rate.npz') + #plot_group(fig1, all_hyper, all_perf, legend='Pong') + #all_hyper, all_perf = load_hyper_data('puffer_breakout_learning_rate.npz') + #plot_group(fig1, all_hyper, all_perf, legend='Breakout', i=1) + + fig2 = figure(title='Seed Sensitivity', xlabel='Uptime', legend='Ablate') + #all_uptime, all_perf = load_seed_data('puffer_pong_seeds.npz') + #plot_group(fig2, all_uptime, all_perf, legend='Pong') + #all_uptime, all_perf = load_seed_data('puffer_breakout_seeds.npz') + #plot_group(fig2, all_uptime, all_perf, legend='Breakout', i=1) + #all_uptime, all_perf = load_seed_data('puffer_connect4_seeds.npz') + #plot_group(fig2, all_uptime, all_perf, legend='Connect4', i=2) + + #fig3 = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') + fig3 = figure(title='Sweep', xlabel='Steps', ylabel='Score', legend='Trial') + steps, costs, scores = load_sweep_data('experiments/logs/puffer_pong/*.json') + #pareto_steps, pareto_costs, pareto_scores = pareto_points(steps, costs, scores) + #plot_lines(fig3, [pareto_steps], [pareto_costs]) + #scatter(fig3, steps, costs, scores, legend='Pong') + scatter(fig3, steps, scores, costs, legend='Pong') + layout = html.Div([ + html.H1('The Puffer Frontier Project', style={'textAlign': 'center'}), + dcc.Graph(figure=fig1), + html.Br(), + dcc.Graph(figure=fig2), + dcc.Graph(figure=fig3) + ]) + return layout + + + +# Initialize Dash app +app = Dash() + +# Set layout with static graph +app.layout = layout +if __name__ == '__main__': + app.run(host='0.0.0.0', port=8080) diff --git a/pufferlib/config/ocean/pong.ini b/pufferlib/config/ocean/pong.ini index 7ff0960b1..543fd68da 100644 --- a/pufferlib/config/ocean/pong.ini +++ b/pufferlib/config/ocean/pong.ini @@ -37,9 +37,9 @@ max_cost = 30 [sweep.train.total_timesteps] distribution = log_normal -min = 1e7 -max = 2e8 -mean = 8e7 +min = 1e5 +max = 2e7 +mean = 1e7 scale = auto [sweep.env.frameskip] diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 9799f724c..3f9fe8f70 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -167,7 +167,7 @@ def __init__(self, config, vecenv, policy, logger=None): # Logging self.logger = logger if logger is None: - self.logger = NoLogger(config) + self.logger = Logger(config) # Learning rate scheduler epochs = config['total_timesteps'] // config['batch_size'] @@ -496,7 +496,8 @@ def close(self): self.utilization.stop() model_path = self.save_checkpoint() run_id = self.logger.run_id - path = os.path.join(self.config['data_dir'], f'{self.config["env"]}_{run_id}.pt') + path = os.path.join(self.config['data_dir'], + self.config["env"], f'{run_id}.pt') shutil.copy(model_path, path) return path @@ -506,7 +507,8 @@ def save_checkpoint(self): return run_id = self.logger.run_id - path = os.path.join(self.config['data_dir'], f'{self.config["env"]}_{run_id}') + path = os.path.join(self.config['data_dir'], + self.config["env"], run_id) if not os.path.exists(path): os.makedirs(path) @@ -795,15 +797,28 @@ def downsample(arr, m): downsampled = arr.reshape(m, -1).mean(axis=1) return np.concatenate([downsampled, [last]]) -class NoLogger: +class Logger: def __init__(self, args): - self.run_id = str(int(100*time.time())) + self.run_id = str(int(1000*time.time())) + root = os.path.join(args['data_dir'], 'logs', args['env']) + if not os.path.exists(root): + os.makedirs(root) + + self.path = os.path.join(root, self.run_id + '.json') + self.logs = {'data': []} + for k, v in pufferlib.unroll_nested_dict(args): + self.logs[k] = v def log(self, logs, step): - pass + self.logs['data'].append(logs) + + def log_cost(self, cost): + self.logs['cost'] = cost def close(self, model_path): - pass + import json + with open(self.path, 'w') as f: + json.dump(self.logs, f) class NeptuneLogger: def __init__(self, args, load_id=None, mode='async'): @@ -942,6 +957,7 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None): #pufferl.print_dashboard() model_path = pufferl.close() + pufferl.logger.log_cost(uptime) pufferl.logger.close(model_path) return all_logs @@ -1119,7 +1135,7 @@ def sweep(args=None, env_name=None): all_logs = train(env_name, args=args) all_logs = [e for e in all_logs if target_key in e] scores = downsample([log[target_key] for log in all_logs], points_per_run) - costs = downsample([log['uptime'] for log in all_logs], points_per_run) + costs = downsample([log['agent_steps'] for log in all_logs], points_per_run) timesteps = downsample([log['agent_steps'] for log in all_logs], points_per_run) for score, cost, timestep in zip(scores, costs, timesteps): args['train']['total_timesteps'] = timestep From 40da9046dd601ea329403f2d1a5bc198418a5f40 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 16 Sep 2025 00:48:23 +0000 Subject: [PATCH 003/188] Stable pure sample eff multisweep --- plottest.py | 3 ++- pufferlib/config/default.ini | 16 +++++++-------- pufferlib/config/ocean/pong.ini | 17 ++++++++++++++++ pufferlib/models.py | 3 ++- pufferlib/pufferl.py | 36 +++++++++++++-------------------- 5 files changed, 43 insertions(+), 32 deletions(-) diff --git a/plottest.py b/plottest.py index cf09b4d40..5e7197350 100644 --- a/plottest.py +++ b/plottest.py @@ -230,7 +230,8 @@ def load_sweep_data(path): exp = json.load(f) cost = exp['cost'] - step = exp['total_timesteps'] + #step = exp['total_timesteps'] + step = exp['data'][-1]['agent_steps'] score = exp['data'][-1]['environment/score'] costs.append(cost) steps.append(step) diff --git a/pufferlib/config/default.ini b/pufferlib/config/default.ini index 0990fdb13..751e065db 100644 --- a/pufferlib/config/default.ini +++ b/pufferlib/config/default.ini @@ -32,7 +32,7 @@ total_timesteps = 10_000_000 learning_rate = 0.015 gamma = 0.995 gae_lambda = 0.90 -update_epochs = 1 +num_minibatches = 16 clip_coef = 0.2 vf_coef = 2.0 vf_clip_coef = 0.2 @@ -90,7 +90,7 @@ scale = auto [sweep.train.minibatch_size] distribution = uniform_pow2 -min = 8192 +min = 512 max = 65536 mean = 32768 scale = auto @@ -137,12 +137,12 @@ max = 5.0 mean = 1.0 scale = auto -#[sweep.train.update_epochs] -#distribution = int_uniform -#min = 1 -#max = 8 -#mean = 1 -#scale = 2.0 +[sweep.train.num_minibatches] +distribution = uniform_pow2 +min = 1 +max = 1024 +mean = 32 +scale = auto [sweep.train.clip_coef] distribution = uniform diff --git a/pufferlib/config/ocean/pong.ini b/pufferlib/config/ocean/pong.ini index 543fd68da..8ae46dd5b 100644 --- a/pufferlib/config/ocean/pong.ini +++ b/pufferlib/config/ocean/pong.ini @@ -4,6 +4,9 @@ env_name = puffer_pong policy_name = Policy rnn_name = Recurrent +[policy] +hidden_size = 128 + [vec] num_envs = 4 @@ -35,6 +38,13 @@ vtrace_rho_clip = 2.784150207139061 downsample = 0 max_cost = 30 +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 16 +max = 1024 +mean = 128 +scale = auto + [sweep.train.total_timesteps] distribution = log_normal min = 1e5 @@ -42,6 +52,13 @@ max = 2e7 mean = 1e7 scale = auto +[sweep.env.num_envs] +distribution = uniform_pow2 +min = 1 +max = 4096 +mean = 2048 +scale = auto + [sweep.env.frameskip] distribution = int_uniform min = 1 diff --git a/pufferlib/models.py b/pufferlib/models.py index fa43d7071..b548a3e97 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -98,13 +98,14 @@ def decode_actions(self, hidden): return logits, values class LSTMWrapper(nn.Module): - def __init__(self, env, policy, input_size=128, hidden_size=128): + def __init__(self, env, policy, hidden_size=128): '''Wraps your policy with an LSTM without letting you shoot yourself in the foot with bad transpose and shape operations. This saves much pain. Requires that your policy define encode_observations and decode_actions. See the Default policy for an example.''' super().__init__() self.obs_shape = env.single_observation_space.shape + input_size = hidden_size self.policy = policy self.input_size = input_size diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 3f9fe8f70..4651025af 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -115,17 +115,7 @@ def __init__(self, config, vecenv, policy, logger=None): minibatch_size = config['minibatch_size'] max_minibatch_size = config['max_minibatch_size'] self.minibatch_size = min(minibatch_size, max_minibatch_size) - if minibatch_size > max_minibatch_size and minibatch_size % max_minibatch_size != 0: - raise pufferlib.APIUsageError( - f'minibatch_size {minibatch_size} > max_minibatch_size {max_minibatch_size} must divide evenly') - - if batch_size < minibatch_size: - raise pufferlib.APIUsageError( - f'batch_size {batch_size} must be >= minibatch_size {minibatch_size}' - ) - self.accumulate_minibatches = max(1, minibatch_size // max_minibatch_size) - self.total_minibatches = int(config['update_epochs'] * batch_size / self.minibatch_size) self.minibatch_segments = self.minibatch_size // horizon if self.minibatch_segments * horizon != self.minibatch_size: raise pufferlib.APIUsageError( @@ -329,7 +319,8 @@ def train(self): anneal_beta = b0 + (1 - b0)*a*self.epoch/self.total_epochs self.ratio[:] = 1 - for mb in range(self.total_minibatches): + num_minibatches = config['num_minibatches'] + for mb in range(num_minibatches): profile('train_misc', epoch, nest=True) self.amp_context.__enter__() @@ -343,7 +334,8 @@ def train(self): adv = advantages.abs().sum(axis=1) prio_weights = torch.nan_to_num(adv**a, 0, 0, 0) prio_probs = (prio_weights + 1e-6)/(prio_weights.sum() + 1e-6) - idx = torch.multinomial(prio_probs, self.minibatch_segments) + idx = torch.multinomial(prio_probs, + self.minibatch_segments, replacement=True) mb_prio = (self.segments*prio_probs[idx, None])**-anneal_beta mb_obs = self.observations[idx] mb_actions = self.actions[idx] @@ -408,13 +400,13 @@ def train(self): # Logging profile('train_misc', epoch) - losses['policy_loss'] += pg_loss.item() / self.total_minibatches - losses['value_loss'] += v_loss.item() / self.total_minibatches - losses['entropy'] += entropy_loss.item() / self.total_minibatches - losses['old_approx_kl'] += old_approx_kl.item() / self.total_minibatches - losses['approx_kl'] += approx_kl.item() / self.total_minibatches - losses['clipfrac'] += clipfrac.item() / self.total_minibatches - losses['importance'] += ratio.mean().item() / self.total_minibatches + losses['policy_loss'] += pg_loss.item() / num_minibatches + losses['value_loss'] += v_loss.item() / num_minibatches + losses['entropy'] += entropy_loss.item() / num_minibatches + losses['old_approx_kl'] += old_approx_kl.item() / num_minibatches + losses['approx_kl'] += approx_kl.item() / num_minibatches + losses['clipfrac'] += clipfrac.item() / num_minibatches + losses['importance'] += ratio.mean().item() / num_minibatches # Learn on accumulated minibatches profile('learn', epoch) @@ -432,8 +424,8 @@ def train(self): y_pred = self.values.flatten() y_true = advantages.flatten() + self.values.flatten() var_y = y_true.var() - explained_var = torch.nan if var_y == 0 else 1 - (y_true - y_pred).var() / var_y - losses['explained_variance'] = explained_var.item() + explained_var = torch.nan if var_y == 0 else (1 - (y_true - y_pred).var() / var_y).item() + losses['explained_variance'] = explained_var profile.end() logs = None @@ -1205,7 +1197,7 @@ def load_policy(args, vecenv, env_name=''): rnn_name = args['rnn_name'] if rnn_name is not None: rnn_cls = getattr(env_module.torch, args['rnn_name']) - policy = rnn_cls(vecenv.driver_env, policy, **args['rnn']) + policy = rnn_cls(vecenv.driver_env, policy, **args['policy']) policy = policy.to(device) From c017b81365a6f93bee41ba36ca23d89d05b65fe8 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 18 Sep 2025 15:08:52 +0000 Subject: [PATCH 004/188] Compute data --- plottest.py | 2 +- pufferlib/config/default.ini | 8 --- pufferlib/config/ocean/breakout.ini | 43 +++++++++------ pufferlib/config/ocean/pong.ini | 2 +- pufferlib/pufferl.py | 86 ++++++++++++++++++++++++++++- pufferlib/sweep.py | 2 +- 6 files changed, 112 insertions(+), 31 deletions(-) diff --git a/plottest.py b/plottest.py index 5e7197350..b9df5520d 100644 --- a/plottest.py +++ b/plottest.py @@ -279,4 +279,4 @@ def layout(): # Set layout with static graph app.layout = layout if __name__ == '__main__': - app.run(host='0.0.0.0', port=8080) + app.run(host='0.0.0.0', port=8090) diff --git a/pufferlib/config/default.ini b/pufferlib/config/default.ini index 751e065db..546f90206 100644 --- a/pufferlib/config/default.ini +++ b/pufferlib/config/default.ini @@ -73,14 +73,6 @@ downsample = 10 #mean = 8 #scale = auto -# TODO: Elim from base -[sweep.train.total_timesteps] -distribution = log_normal -min = 5e7 -max = 1e10 -mean = 1e8 -scale = time - [sweep.train.bptt_horizon] distribution = uniform_pow2 min = 16 diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index d261503f5..b9302d85e 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -9,7 +9,7 @@ num_envs = 8 [env] num_envs = 1024 -frameskip = 4 +frameskip = 1 width = 576 height = 330 paddle_width = 62 @@ -26,14 +26,10 @@ paddle_speed = 620 continuous = 0 [policy] -hidden_size = 128 - -[rnn] -input_size = 128 -hidden_size = 128 +hidden_size = 512 [train] -total_timesteps = 90_000_000 +total_timesteps = 40_000_000 adam_beta1 = 0.8946507418260217 adam_beta2 = 0.9 adam_eps = 0.0001 @@ -53,16 +49,27 @@ vf_coef = 1.6832989594296321 vtrace_c_clip = 2.878171091654008 vtrace_rho_clip = 0.7876748061547312 -[sweep.train.total_timesteps] -distribution = log_normal -min = 3e7 -max = 2e8 -mean = 8e7 -scale = auto +[sweep] +downsample = 10 +max_cost = 240 + +#[sweep.train.total_timesteps] +#distribution = log_normal +#min = 1e6 +#max = 2e8 +#mean = 8e7 +#scale = auto -[sweep.env.frameskip] -distribution = int_uniform +#[sweep.policy.hidden_size] +#distribution = uniform_pow2 +#min = 16 +#max = 1024 +#mean = 128 +#scale = auto + +[sweep.env.num_envs] +distribution = uniform_pow2 min = 1 -max = 8 -mean = 4 -scale = 2.0 +max = 4096 +mean = 2048 +scale = auto diff --git a/pufferlib/config/ocean/pong.ini b/pufferlib/config/ocean/pong.ini index 8ae46dd5b..6ce0d8909 100644 --- a/pufferlib/config/ocean/pong.ini +++ b/pufferlib/config/ocean/pong.ini @@ -47,7 +47,7 @@ scale = auto [sweep.train.total_timesteps] distribution = log_normal -min = 1e5 +min = 1e6 max = 2e7 mean = 1e7 scale = auto diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 4651025af..e3962eb62 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -160,7 +160,7 @@ def __init__(self, config, vecenv, policy, logger=None): self.logger = Logger(config) # Learning rate scheduler - epochs = config['total_timesteps'] // config['batch_size'] + epochs = max(1, config['total_timesteps'] // config['batch_size']) self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs) self.total_epochs = epochs @@ -773,7 +773,7 @@ def stop(self): self.stopped = True def downsample(arr, m): - if len(arr) < m: + if len(arr) <= m: return arr if m == 0: @@ -1103,6 +1103,86 @@ def multisweep(args=None, env_name=None): sweep.suggest(args) worker_queues[w].put(args) +def paretosweep(args=None, env_name=None): + args = args or load_config(env_name) + sweep_gpus = args['sweep_gpus'] + if sweep_gpus == -1: + sweep_gpus = torch.cuda.device_count() + + method = args['sweep'].pop('method') + try: + sweep_cls = getattr(pufferlib.sweep, method) + except: + raise pufferlib.APIUsageError(f'Invalid sweep method {method}. See pufferlib.sweep') + + total_timesteps = args['sweep']['train'].pop('total_timesteps') + mmin = total_timesteps['min'] + mmax = total_timesteps['max'] + all_timesteps = np.geomspace(mmin, mmax, sweep_gpus) + # You hardcoded buffer size to 5 instead of 10 for this + sweeps = [sweep_cls(args['sweep']) for _ in range(sweep_gpus)] + points_per_run = args['sweep']['downsample'] + target_key = f'environment/{args["sweep"]["metric"]}' + + from multiprocessing import Process, Queue, set_start_method + from copy import deepcopy + + host_queues = [] + worker_queues = [] + workers = [] + worker_args = [] + set_start_method('spawn') + for i in range(sweep_gpus): + q_host = Queue() + q_worker = Queue() + w = Process( + target=_sweep_worker, + args=(env_name, q_host, q_worker, f'cuda:{i}') + ) + w.start() + host_queues.append(q_host) + worker_queues.append(q_worker) + args = deepcopy(args) + worker_args.append(args) + + for w in range(sweep_gpus): + args = worker_args[w] + sweeps[w].suggest(args) + args['train']['total_timesteps'] = all_timesteps[w] + worker_queues[w].put(args) + + runs = 0 + + suggestion = deepcopy(args) + while runs < args['max_runs']: + for w in range(sweep_gpus): + args = worker_args[w] + if host_queues[w].empty(): + continue + + all_logs = host_queues[w].get(timeout=0) + if not all_logs: + continue + + all_logs = [e for e in all_logs if target_key in e] + scores = downsample([log[target_key] for log in all_logs], points_per_run) + times = downsample([log['uptime'] for log in all_logs], points_per_run) + steps = downsample([log['agent_steps'] for log in all_logs], points_per_run) + #costs = np.stack([times, steps], axis=1) + costs = times + timesteps = [log['agent_steps'] for log in all_logs] + timesteps = downsample(timesteps, points_per_run) + for score, cost, timestep in zip(scores, costs, timesteps): + args['train']['total_timesteps'] = timestep + sweeps[w].observe(args, score, cost) + + runs += 1 + + sweeps[w].suggest(args) + args['train']['total_timesteps'] = all_timesteps[w] + worker_queues[w].put(args) + + def sweep(args=None, env_name=None): args = args or load_config(env_name) if not args['wandb'] and not args['neptune']: @@ -1317,6 +1397,8 @@ def main(): sweep(env_name=env_name) elif mode == 'multisweep': multisweep(env_name=env_name) + elif mode == 'paretosweep': + paretosweep(env_name=env_name) elif mode == 'autotune': autotune(env_name=env_name) elif mode == 'profile': diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py index 28e7f1c8e..61c355072 100644 --- a/pufferlib/sweep.py +++ b/pufferlib/sweep.py @@ -337,7 +337,7 @@ def __init__(self, suggestions_per_pareto = 256, seed_with_search_center = True, expansion_rate = 0.25, - buffer_size = 10, + buffer_size = 5, max_cost = 30, ): self.hyperparameters = Hyperparameters(sweep_config) From b028b77902f15825f0b403db725080686ecebc09 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 19 Sep 2025 22:48:35 +0000 Subject: [PATCH 005/188] stable --- plottest.py | 20 +++++++++++++++++--- pufferlib/config/ocean/pong.ini | 25 ++++++++++++++++++++++++- pufferlib/pufferl.py | 12 +++++++++--- 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/plottest.py b/plottest.py index b9df5520d..4e40aaa33 100644 --- a/plottest.py +++ b/plottest.py @@ -230,9 +230,22 @@ def load_sweep_data(path): exp = json.load(f) cost = exp['cost'] + #cost = exp['train/num_minibatches'] + #cost = exp['env/frameskip'] #step = exp['total_timesteps'] step = exp['data'][-1]['agent_steps'] score = exp['data'][-1]['environment/score'] + if score < 20: + continue + + if score > 20 and cost < 15 and step > 20e6: + for kk, vv in exp.items(): + if isinstance(vv, dict): + for k, v in vv.items(): + print(kk, k, '=', v) + else: + print(kk, '=', vv) + costs.append(cost) steps.append(step) scores.append(score) @@ -256,12 +269,13 @@ def layout(): #plot_group(fig2, all_uptime, all_perf, legend='Connect4', i=2) #fig3 = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') - fig3 = figure(title='Sweep', xlabel='Steps', ylabel='Score', legend='Trial') + fig3 = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') steps, costs, scores = load_sweep_data('experiments/logs/puffer_pong/*.json') #pareto_steps, pareto_costs, pareto_scores = pareto_points(steps, costs, scores) #plot_lines(fig3, [pareto_steps], [pareto_costs]) #scatter(fig3, steps, costs, scores, legend='Pong') - scatter(fig3, steps, scores, costs, legend='Pong') + #scatter(fig3, steps, scores, costs, legend='Pong') + scatter(fig3, steps, costs, scores, legend='Pong') layout = html.Div([ html.H1('The Puffer Frontier Project', style={'textAlign': 'center'}), dcc.Graph(figure=fig1), @@ -279,4 +293,4 @@ def layout(): # Set layout with static graph app.layout = layout if __name__ == '__main__': - app.run(host='0.0.0.0', port=8090) + app.run(host='0.0.0.0', port=8000) diff --git a/pufferlib/config/ocean/pong.ini b/pufferlib/config/ocean/pong.ini index 6ce0d8909..aeedacad7 100644 --- a/pufferlib/config/ocean/pong.ini +++ b/pufferlib/config/ocean/pong.ini @@ -11,7 +11,7 @@ hidden_size = 128 num_envs = 4 [env] -num_envs = 1024 +num_envs = 1024 frameskip = 8 [train] @@ -27,6 +27,7 @@ gamma = 0.9608378504980243 learning_rate = 0.07109386062895108 max_grad_norm = 1.7820203601055993 minibatch_size = 32768 +num_minibatches = 8 prio_alpha = 0.09999999999999998 prio_beta0 = 0.7475661360032159 vf_clip_coef = 2.7025841941932303 @@ -34,6 +35,28 @@ vf_coef = 1.9960893747329385 vtrace_c_clip = 1.0873122745787867 vtrace_rho_clip = 2.784150207139061 +#total_timesteps = 20000000.0 +#learning_rate = 0.08878791349515394 +#gamma = 0.9354145180237635 +#gae_lambda = 0.9020935398076688 +#num_minibatches = 32 +#clip_coef = 0.5882777043345978 +#vf_coef = 4.196442104147645 +#vf_clip_coef = 0.265385659520976 +#max_grad_norm = 0.3661413663411234 +#ent_coef = 0.0011560317997450196 +#adam_beta1 = 0.9462393585831101 +#adam_beta2 = 0.9667417156941432 +#adam_eps = 1.1005478999774079e-09 +#minibatch_size = 65536 +#max_minibatch_size = 32768 +#bptt_horizon = 64 +#vtrace_rho_clip = 1.8180933155594725 +#vtrace_c_clip = 1.4235484929825957 +#prio_alpha = 0.9553779337727483 +#prio_beta0 = 0.7125182812602482 + + [sweep] downsample = 0 max_cost = 30 diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index e3962eb62..d9cc6bf4e 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -188,7 +188,7 @@ def __init__(self, config, vecenv, policy, logger=None): # Dashboard self.model_size = sum(p.numel() for p in policy.parameters() if p.requires_grad) - #self.print_dashboard(clear=True) + self.print_dashboard(clear=True) @property def uptime(self): @@ -434,7 +434,7 @@ def train(self): if done_training or self.global_step == 0 or time.time() > self.last_log_time + 0.25: logs = self.mean_and_log() self.losses = losses - #self.print_dashboard() + self.print_dashboard() self.stats = defaultdict(list) self.last_log_time = time.time() self.last_log_step = self.global_step @@ -801,6 +801,11 @@ def __init__(self, args): for k, v in pufferlib.unroll_nested_dict(args): self.logs[k] = v + # Temp hack to log full config + def init(self, args): + for k, v in pufferlib.unroll_nested_dict(args): + self.logs[k] = v + def log(self, logs, step): self.logs['data'].append(logs) @@ -913,6 +918,7 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None): train_config = dict(**args['train'], env=env_name) pufferl = PuffeRL(train_config, vecenv, policy, logger) + pufferl.logger.init(args) all_logs = [] while pufferl.global_step < train_config['total_timesteps']: @@ -947,7 +953,7 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None): if logs is not None: all_logs.append(logs) - #pufferl.print_dashboard() + pufferl.print_dashboard() model_path = pufferl.close() pufferl.logger.log_cost(uptime) pufferl.logger.close(model_path) From 2d11375262feca0bf8f0d7521981dd122730e272 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 19 Sep 2025 23:17:07 +0000 Subject: [PATCH 006/188] Fix sweep len --- plottest.py | 2 +- pufferlib/config/ocean/breakout.ini | 31 ++++++++++++++++++----------- pufferlib/config/ocean/pong.ini | 4 ++-- pufferlib/pufferl.py | 3 ++- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/plottest.py b/plottest.py index 4e40aaa33..bc1990210 100644 --- a/plottest.py +++ b/plottest.py @@ -238,7 +238,7 @@ def load_sweep_data(path): if score < 20: continue - if score > 20 and cost < 15 and step > 20e6: + if score > 20 and cost < 15 and step < 2e6: for kk, vv in exp.items(): if isinstance(vv, dict): for k, v in vv.items(): diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index b9302d85e..55600e585 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -53,19 +53,19 @@ vtrace_rho_clip = 0.7876748061547312 downsample = 10 max_cost = 240 -#[sweep.train.total_timesteps] -#distribution = log_normal -#min = 1e6 -#max = 2e8 -#mean = 8e7 -#scale = auto +[sweep.train.total_timesteps] +distribution = log_normal +min = 1e6 +max = 1e8 +mean = 5e7 +scale = auto -#[sweep.policy.hidden_size] -#distribution = uniform_pow2 -#min = 16 -#max = 1024 -#mean = 128 -#scale = auto +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 16 +max = 1024 +mean = 128 +scale = auto [sweep.env.num_envs] distribution = uniform_pow2 @@ -73,3 +73,10 @@ min = 1 max = 4096 mean = 2048 scale = auto + +[sweep.env.frameskip] +distribution = int_uniform +min = 1 +max = 8 +mean = 4 +scale = 2.0 diff --git a/pufferlib/config/ocean/pong.ini b/pufferlib/config/ocean/pong.ini index aeedacad7..3d7351c6a 100644 --- a/pufferlib/config/ocean/pong.ini +++ b/pufferlib/config/ocean/pong.ini @@ -70,8 +70,8 @@ scale = auto [sweep.train.total_timesteps] distribution = log_normal -min = 1e6 -max = 2e7 +min = 5e5 +max = 12e6 mean = 1e7 scale = auto diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index d9cc6bf4e..e86b455bc 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -1188,6 +1188,7 @@ def paretosweep(args=None, env_name=None): args['train']['total_timesteps'] = all_timesteps[w] worker_queues[w].put(args) + print('Done') def sweep(args=None, env_name=None): args = args or load_config(env_name) @@ -1328,7 +1329,7 @@ def load_config(env_name): parser.add_argument('--save-frames', type=int, default=0) parser.add_argument('--gif-path', type=str, default='eval.gif') parser.add_argument('--fps', type=float, default=15) - parser.add_argument('--max-runs', type=int, default=200, help='Max number of sweep runs') + parser.add_argument('--max-runs', type=int, default=1200, help='Max number of sweep runs') parser.add_argument('--wandb', action='store_true', help='Use wandb for logging') parser.add_argument('--wandb-project', type=str, default='pufferlib') parser.add_argument('--wandb-group', type=str, default='debug') From 337f52058aefbefcaafa39f024fb8a5e04d52f70 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 19 Sep 2025 23:18:38 +0000 Subject: [PATCH 007/188] hide dash --- pufferlib/pufferl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index e86b455bc..f65880c3c 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -188,7 +188,7 @@ def __init__(self, config, vecenv, policy, logger=None): # Dashboard self.model_size = sum(p.numel() for p in policy.parameters() if p.requires_grad) - self.print_dashboard(clear=True) + #self.print_dashboard(clear=True) @property def uptime(self): @@ -434,7 +434,7 @@ def train(self): if done_training or self.global_step == 0 or time.time() > self.last_log_time + 0.25: logs = self.mean_and_log() self.losses = losses - self.print_dashboard() + #self.print_dashboard() self.stats = defaultdict(list) self.last_log_time = time.time() self.last_log_step = self.global_step @@ -953,7 +953,7 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None): if logs is not None: all_logs.append(logs) - pufferl.print_dashboard() + #pufferl.print_dashboard() model_path = pufferl.close() pufferl.logger.log_cost(uptime) pufferl.logger.close(model_path) From 95ba0052b152ce055db5283c3aea32628677e469 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Sat, 20 Sep 2025 21:28:38 +0000 Subject: [PATCH 008/188] Upgrade vis --- plottest.py | 108 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 44 deletions(-) diff --git a/plottest.py b/plottest.py index bc1990210..cf791f735 100644 --- a/plottest.py +++ b/plottest.py @@ -103,15 +103,7 @@ def scatter(fig, x, y, c, legend='Trial', log_x=False, i=0): colors = [] for e in c: - if e > 0.95*mmax: - v = 1.0 - elif e > 0.9*mmax: - v = 0.6 - elif e > 0.5*mmax: - v = 0.3 - else: - v = 0 - + v = (e - mmin)/(mmax - mmin) colors.append(f'rgb(0, 0.5, {v})') #c = (np.array(c) - min(c))/(max(c) - min(c)) @@ -222,45 +214,31 @@ def load_hyper_data(filename): def load_sweep_data(path): import glob - costs = [] - steps = [] - scores = [] + experiments = [] for fpath in glob.glob(path): with open(fpath, 'r') as f: exp = json.load(f) - cost = exp['cost'] - #cost = exp['train/num_minibatches'] - #cost = exp['env/frameskip'] - #step = exp['total_timesteps'] - step = exp['data'][-1]['agent_steps'] - score = exp['data'][-1]['environment/score'] - if score < 20: - continue - - if score > 20 and cost < 15 and step < 2e6: - for kk, vv in exp.items(): - if isinstance(vv, dict): - for k, v in vv.items(): - print(kk, k, '=', v) - else: - print(kk, '=', vv) - - costs.append(cost) - steps.append(step) - scores.append(score) + data = {} + for kk, vv in exp.items(): + if kk == 'data': + for k, v in exp[kk][-1].items(): + data[k] = v + else: + data[kk] = vv - return steps, costs, scores + experiments.append(data) + return experiments def layout(): - fig1 = figure(title='Hyperparameter Ablation', xlabel='Learning Rate', legend='Ablate', xaxis_type='log') + #fig1 = figure(title='Hyperparameter Ablation', xlabel='Learning Rate', legend='Ablate', xaxis_type='log') #all_hyper, all_perf = load_hyper_data('puffer_pong_learning_rate.npz') #plot_group(fig1, all_hyper, all_perf, legend='Pong') #all_hyper, all_perf = load_hyper_data('puffer_breakout_learning_rate.npz') #plot_group(fig1, all_hyper, all_perf, legend='Breakout', i=1) - fig2 = figure(title='Seed Sensitivity', xlabel='Uptime', legend='Ablate') + #fig2 = figure(title='Seed Sensitivity', xlabel='Uptime', legend='Ablate') #all_uptime, all_perf = load_seed_data('puffer_pong_seeds.npz') #plot_group(fig2, all_uptime, all_perf, legend='Pong') #all_uptime, all_perf = load_seed_data('puffer_breakout_seeds.npz') @@ -268,20 +246,62 @@ def layout(): #all_uptime, all_perf = load_seed_data('puffer_connect4_seeds.npz') #plot_group(fig2, all_uptime, all_perf, legend='Connect4', i=2) - #fig3 = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') - fig3 = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') - steps, costs, scores = load_sweep_data('experiments/logs/puffer_pong/*.json') + experiments = load_sweep_data('experiments/logs/puffer_pong/*.json') + steps = [e['agent_steps'] for e in experiments] + costs = [e['cost'] for e in experiments] + scores = [e['environment/score'] for e in experiments] + + # Filter outliers + idxs = [i for i, s in enumerate(steps) if s < 1e6] + experiments = [experiments[i] for i in idxs] + steps = [steps[i] for i in idxs] + costs = [costs[i] for i in idxs] + scores = [scores[i] for i in idxs] + + # Adjust steps + has_skip = ['env/frameskip' in e for e in experiments] + if any(has_skip): + experiments = [e for e, s in zip(experiments, has_skip) if s] + steps = [e['env/frameskip']*e['agent_steps'] for e, s + in zip(experiments, has_skip) if s] + costs = [e['cost'] for e, s in zip(experiments, has_skip) if s] + scores = [e['environment/score'] for e, s in zip(experiments, has_skip) if s] + + # Filter by score + max_score = max(scores) + idxs = [i for i, s in enumerate(scores) if s > 0.95*max_score] + filtered_steps = [steps[i] for i in idxs] + filtered_costs = [costs[i] for i in idxs] + filtered_scores = [scores[i] for i in idxs] + + # Header plot + frontier = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') + scatter(frontier, filtered_steps, filtered_costs, filtered_scores, legend='Pong') + + figs = [] + hypers = [ + 'train/learning_rate', + 'train/num_minibatches', + 'policy/hidden_size', + 'env/frameskip', + ] + for hyper in hypers: + f = figure(title=hyper, xlabel=hyper, ylabel='Score', legend='Ablate') + idxs = [i for i, e in enumerate(experiments) if hyper in e] + v = [experiments[i][hyper] for i in idxs] + s = [scores[i] for i in idxs] + ss = [np.log(steps[i]) for i in idxs] + c = [costs[i] for i in idxs] + scatter(f, v, s, ss, legend='Pong') + figs.append(f) + #pareto_steps, pareto_costs, pareto_scores = pareto_points(steps, costs, scores) #plot_lines(fig3, [pareto_steps], [pareto_costs]) - #scatter(fig3, steps, costs, scores, legend='Pong') - #scatter(fig3, steps, scores, costs, legend='Pong') - scatter(fig3, steps, costs, scores, legend='Pong') layout = html.Div([ html.H1('The Puffer Frontier Project', style={'textAlign': 'center'}), - dcc.Graph(figure=fig1), + dcc.Graph(figure=frontier), html.Br(), - dcc.Graph(figure=fig2), - dcc.Graph(figure=fig3) + *[dcc.Graph(figure=f) for f in figs] ]) return layout From 10d96f0d025bfbc429a26b2ad051a58db0e26de8 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Sat, 20 Sep 2025 21:36:30 +0000 Subject: [PATCH 009/188] breakout config --- pufferlib/config/ocean/breakout.ini | 71 +++++++++++++++-------------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index 55600e585..2eeb5796e 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -5,10 +5,12 @@ policy_name = Policy rnn_name = Recurrent [vec] -num_envs = 8 +#num_envs = 8 +num_envs = 1 [env] -num_envs = 1024 +#num_envs = 64 +num_envs = 512 frameskip = 1 width = 576 height = 330 @@ -30,42 +32,43 @@ hidden_size = 512 [train] total_timesteps = 40_000_000 -adam_beta1 = 0.8946507418260217 -adam_beta2 = 0.9 -adam_eps = 0.0001 +adam_beta1 = 0.9389740236912132 +adam_beta2 = 0.9998225039929157 +adam_eps = 1.0267361590791064e-8 batch_size = auto bptt_horizon = 64 -clip_coef = 0.19696765958267629 -ent_coef = 0.0005690816545012474 -gae_lambda = 0.747650023961198 +clip_coef = 0.01557913923814178 +ent_coef = 0.0031759371032913 +gae_lambda = 0.916681264452842 gamma = 0.9997053654668936 -learning_rate = 0.044482546441415506 -max_grad_norm = 2.2356112188495723 -minibatch_size = 32768 -prio_alpha = 0.98967001208896 -prio_beta0 = 0.09999999999999998 -vf_clip_coef = 2.178492167689251 -vf_coef = 1.6832989594296321 +learning_rate = 0.012744235594115342 +max_grad_norm = 1.8013800046071862 +num_minibatches = 8 +minibatch_size = 4096 +prio_alpha = 0.9500430793857082 +prio_beta0 = 0.9436845548994959 +vf_clip_coef = 0.1 +vf_coef = 2.5994729835919834 vtrace_c_clip = 2.878171091654008 -vtrace_rho_clip = 0.7876748061547312 +vtrace_rho_clip = 1.3235791596831579 [sweep] downsample = 10 max_cost = 240 -[sweep.train.total_timesteps] -distribution = log_normal -min = 1e6 -max = 1e8 -mean = 5e7 -scale = auto +#[sweep.train.total_timesteps] +#distribution = log_normal +#min = 1e6 +#max = 1e8 +#mean = 5e7 +#scale = auto -[sweep.policy.hidden_size] -distribution = uniform_pow2 -min = 16 -max = 1024 -mean = 128 -scale = auto +#[sweep.policy.hidden_size] +#distribution = uniform_pow2 +#min = 16 +#max = 1024 +#mean = 128 +#scale = auto [sweep.env.num_envs] distribution = uniform_pow2 @@ -74,9 +77,9 @@ max = 4096 mean = 2048 scale = auto -[sweep.env.frameskip] -distribution = int_uniform -min = 1 -max = 8 -mean = 4 -scale = 2.0 +#[sweep.env.frameskip] +#distribution = int_uniform +#min = 1 +#max = 8 +#mean = 4 +#scale = 2.0 From cded41d042f6cf60ae97b03076449d1af146603c Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 23 Sep 2025 16:49:14 +0000 Subject: [PATCH 010/188] Eff sweep pong --- pufferlib/config/ocean/pong.ini | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pufferlib/config/ocean/pong.ini b/pufferlib/config/ocean/pong.ini index 3d7351c6a..1d3cf28e6 100644 --- a/pufferlib/config/ocean/pong.ini +++ b/pufferlib/config/ocean/pong.ini @@ -5,17 +5,17 @@ policy_name = Policy rnn_name = Recurrent [policy] -hidden_size = 128 +hidden_size = 512 [vec] num_envs = 4 [env] num_envs = 1024 -frameskip = 8 +frameskip = 4 [train] -total_timesteps = 12_000_000 +total_timesteps = 500_000 adam_beta1 = 0.9766295300012044 adam_beta2 = 0.9998113167362397 adam_eps = 6.301709731262074e-9 @@ -59,7 +59,7 @@ vtrace_rho_clip = 2.784150207139061 [sweep] downsample = 0 -max_cost = 30 +max_cost = 60 [sweep.policy.hidden_size] distribution = uniform_pow2 @@ -68,12 +68,12 @@ max = 1024 mean = 128 scale = auto -[sweep.train.total_timesteps] -distribution = log_normal -min = 5e5 -max = 12e6 -mean = 1e7 -scale = auto +#[sweep.train.total_timesteps] +#distribution = log_normal +#min = 5e5 +#max = 12e6 +#mean = 1e7 +#scale = auto [sweep.env.num_envs] distribution = uniform_pow2 From a9be3df53ea525d7e60d8903d215f266f036c8b3 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 23 Sep 2025 17:43:16 +0000 Subject: [PATCH 011/188] Fix hacky max cost --- pufferlib/sweep.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py index 61c355072..43b558734 100644 --- a/pufferlib/sweep.py +++ b/pufferlib/sweep.py @@ -338,7 +338,7 @@ def __init__(self, seed_with_search_center = True, expansion_rate = 0.25, buffer_size = 5, - max_cost = 30, + max_cost = -1, ): self.hyperparameters = Hyperparameters(sweep_config) self.num_random_samples = num_random_samples @@ -349,6 +349,8 @@ def __init__(self, self.resample_frequency = resample_frequency self.max_suggestion_cost = max_suggestion_cost self.expansion_rate = expansion_rate + self.max_cost = max_cost + self.buffer_size = buffer_size self.buffer = [] @@ -455,8 +457,12 @@ def suggest(self, fill): suggestion_scores = self.hyperparameters.optimize_direction * max_c_mask * ( gp_y_norm*weight) - mask = gp_c > 30 - suggestion_scores[mask.squeeze()] = -1e8 + + # Mask out high cost samples + # These tend to correlate with overconfident predictions + if self.max_cost > 0: + mask = gp_c > self.max_cost + suggestion_scores[mask.squeeze()] = -1e8 idxs = np.argsort(suggestion_scores)[::-1][:self.buffer_size] best_idx = idxs[0] From 2e8b02034f33ca726a058441ca48bbac89b768b2 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 23 Sep 2025 18:24:02 +0000 Subject: [PATCH 012/188] Simple plot --- pufferlib/ocean/plot/data.csv | 6 +++ pufferlib/ocean/plot/plot.c | 95 +++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 pufferlib/ocean/plot/data.csv create mode 100644 pufferlib/ocean/plot/plot.c diff --git a/pufferlib/ocean/plot/data.csv b/pufferlib/ocean/plot/data.csv new file mode 100644 index 000000000..8275c76d4 --- /dev/null +++ b/pufferlib/ocean/plot/data.csv @@ -0,0 +1,6 @@ +x,y +0.0,0.0 +1.0,1.0 +2.0,4.0 +3.0,9.0 +4.0,16.0 diff --git a/pufferlib/ocean/plot/plot.c b/pufferlib/ocean/plot/plot.c new file mode 100644 index 000000000..6fad11f22 --- /dev/null +++ b/pufferlib/ocean/plot/plot.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include "raylib.h" + +int main(void) { + // Read CSV file + FILE *fp = fopen("pufferlib/ocean/plot/data.csv", "r"); + if (!fp) { + printf("Failed to open data.csv\n"); + return 1; + } + + // Skip header line + char line[1024]; + if (!fgets(line, sizeof(line), fp)) { + printf("Failed to read header\n"); + fclose(fp); + return 1; + } + + // Count lines for number of points + int num_points = 0; + long file_pos = ftell(fp); + while (fgets(line, sizeof(line), fp)) num_points++; + rewind(fp); + fseek(fp, file_pos, SEEK_SET); // Reset to after header + + if (num_points == 0) { + printf("No data points\n"); + fclose(fp); + return 1; + } + + float *x = malloc(num_points * sizeof(float)); + float *y = malloc(num_points * sizeof(float)); + int i = 0; + while (fgets(line, sizeof(line), fp)) { + char *token = strtok(line, ","); + if (token) x[i] = atof(token); + token = strtok(NULL, ","); + if (token) y[i] = atof(token); + i++; + } + fclose(fp); + + // Find min/max for scaling + float min_x = x[0], max_x = x[0], min_y = y[0], max_y = y[0]; + for (int j = 1; j < num_points; j++) { + if (x[j] < min_x) min_x = x[j]; + if (x[j] > max_x) max_x = x[j]; + if (y[j] < min_y) min_y = y[j]; + if (y[j] > max_y) max_y = y[j]; + } + float dx = max_x - min_x; + float dy = max_y - min_y; + if (dx == 0) dx = 1.0f; + if (dy == 0) dy = 1.0f; + min_x -= 0.1f * dx; max_x += 0.1f * dx; + min_y -= 0.1f * dy; max_y += 0.1f * dy; + dx = max_x - min_x; + dy = max_y - min_y; + + // Initialize Raylib + const int screenWidth = 800; + const int screenHeight = 600; + const int margin = 50; + InitWindow(screenWidth, screenHeight, "CSV Data Plot"); + SetTargetFPS(60); + + while (!WindowShouldClose()) { + BeginDrawing(); + ClearBackground(RAYWHITE); + + // Draw axes + DrawLine(margin, margin, margin, screenHeight - margin, BLACK); // Y-axis + DrawLine(margin, screenHeight - margin, screenWidth - margin, screenHeight - margin, BLACK); // X-axis + + // Plot lines + for (int j = 0; j < num_points - 1; j++) { + float px1 = margin + (x[j] - min_x) / dx * (screenWidth - 2 * margin); + float py1 = (screenHeight - margin) - (y[j] - min_y) / dy * (screenHeight - 2 * margin); + float px2 = margin + (x[j + 1] - min_x) / dx * (screenWidth - 2 * margin); + float py2 = (screenHeight - margin) - (y[j + 1] - min_y) / dy * (screenHeight - 2 * margin); + DrawLine(px1, py1, px2, py2, BLUE); + } + + EndDrawing(); + } + + free(x); + free(y); + CloseWindow(); + return 0; +} From 51bfd91faa183bec4444962b3a329e96d6896b14 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 23 Sep 2025 18:37:11 +0000 Subject: [PATCH 013/188] breakout config --- plottest.py | 31 ++++++++++++++++++++----- pufferlib/config/ocean/breakout.ini | 36 ++++++++++++++--------------- 2 files changed, 43 insertions(+), 24 deletions(-) diff --git a/plottest.py b/plottest.py index cf791f735..e1fd61b99 100644 --- a/plottest.py +++ b/plottest.py @@ -246,17 +246,19 @@ def layout(): #all_uptime, all_perf = load_seed_data('puffer_connect4_seeds.npz') #plot_group(fig2, all_uptime, all_perf, legend='Connect4', i=2) - experiments = load_sweep_data('experiments/logs/puffer_pong/*.json') + experiments = load_sweep_data('experiments/logs/puffer_breakout/*.json') steps = [e['agent_steps'] for e in experiments] costs = [e['cost'] for e in experiments] scores = [e['environment/score'] for e in experiments] # Filter outliers + ''' idxs = [i for i, s in enumerate(steps) if s < 1e6] experiments = [experiments[i] for i in idxs] steps = [steps[i] for i in idxs] costs = [costs[i] for i in idxs] scores = [scores[i] for i in idxs] + ''' # Adjust steps has_skip = ['env/frameskip' in e for e in experiments] @@ -269,21 +271,38 @@ def layout(): # Filter by score max_score = max(scores) - idxs = [i for i, s in enumerate(scores) if s > 0.95*max_score] + idxs = [i for i, s in enumerate(scores) if s > 0.0*max_score] filtered_steps = [steps[i] for i in idxs] filtered_costs = [costs[i] for i in idxs] filtered_scores = [scores[i] for i in idxs] # Header plot - frontier = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') - scatter(frontier, filtered_steps, filtered_costs, filtered_scores, legend='Pong') + frontier = figure(title='Sweep', xlabel='Costs', ylabel='Scores', legend='Trial') + scatter(frontier, filtered_costs, filtered_scores, filtered_steps, legend='Breakout') figs = [] hypers = [ 'train/learning_rate', + 'train/ent_coef', + 'train/gamma', + 'train/gae_lambda', + 'train/vtrace_rho_clip', + 'train/vtrace_c_clip', + 'train/clip_coef', + 'train/vf_clip_coef', + 'train/vf_coef', + 'train/max_grad_norm', + 'train/adam_beta1', + 'train/adam_beta2', + 'train/adam_eps', + 'train/prio_alpha', + 'train/prio_beta0', + 'train/bptt_horizon', 'train/num_minibatches', + 'train/minibatch_size', 'policy/hidden_size', 'env/frameskip', + 'env/num_envs', ] for hyper in hypers: f = figure(title=hyper, xlabel=hyper, ylabel='Score', legend='Ablate') @@ -292,7 +311,7 @@ def layout(): s = [scores[i] for i in idxs] ss = [np.log(steps[i]) for i in idxs] c = [costs[i] for i in idxs] - scatter(f, v, s, ss, legend='Pong') + scatter(f, v, s, ss, legend='Breakout') figs.append(f) #pareto_steps, pareto_costs, pareto_scores = pareto_points(steps, costs, scores) @@ -313,4 +332,4 @@ def layout(): # Set layout with static graph app.layout = layout if __name__ == '__main__': - app.run(host='0.0.0.0', port=8000) + app.run(host='0.0.0.0', port=8090) diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index 2eeb5796e..fbe9c6722 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -56,19 +56,19 @@ vtrace_rho_clip = 1.3235791596831579 downsample = 10 max_cost = 240 -#[sweep.train.total_timesteps] -#distribution = log_normal -#min = 1e6 -#max = 1e8 -#mean = 5e7 -#scale = auto +[sweep.train.total_timesteps] +distribution = log_normal +min = 1e6 +max = 1e8 +mean = 5e7 +scale = auto -#[sweep.policy.hidden_size] -#distribution = uniform_pow2 -#min = 16 -#max = 1024 -#mean = 128 -#scale = auto +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 16 +max = 1024 +mean = 128 +scale = auto [sweep.env.num_envs] distribution = uniform_pow2 @@ -77,9 +77,9 @@ max = 4096 mean = 2048 scale = auto -#[sweep.env.frameskip] -#distribution = int_uniform -#min = 1 -#max = 8 -#mean = 4 -#scale = 2.0 +[sweep.env.frameskip] +distribution = int_uniform +min = 1 +max = 8 +mean = 4 +scale = 2.0 From 41e241530efc0137c7cf3449f1fec89db86a5017 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 24 Sep 2025 03:26:54 +0000 Subject: [PATCH 014/188] Basic plots --- plottest.py | 64 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/plottest.py b/plottest.py index e1fd61b99..4c2b367ba 100644 --- a/plottest.py +++ b/plottest.py @@ -246,7 +246,9 @@ def layout(): #all_uptime, all_perf = load_seed_data('puffer_connect4_seeds.npz') #plot_group(fig2, all_uptime, all_perf, legend='Connect4', i=2) - experiments = load_sweep_data('experiments/logs/puffer_breakout/*.json') + env_name = 'pong' + + experiments = load_sweep_data(f'experiments/logs/puffer_{env_name}/*.json') steps = [e['agent_steps'] for e in experiments] costs = [e['cost'] for e in experiments] scores = [e['environment/score'] for e in experiments] @@ -271,38 +273,44 @@ def layout(): # Filter by score max_score = max(scores) - idxs = [i for i, s in enumerate(scores) if s > 0.0*max_score] + idxs = [i for i, s in enumerate(scores) if s > 0.95*max_score] filtered_steps = [steps[i] for i in idxs] filtered_costs = [costs[i] for i in idxs] filtered_scores = [scores[i] for i in idxs] # Header plot - frontier = figure(title='Sweep', xlabel='Costs', ylabel='Scores', legend='Trial') - scatter(frontier, filtered_costs, filtered_scores, filtered_steps, legend='Breakout') + step_cost = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') + scatter(step_cost, filtered_steps, filtered_costs, filtered_scores, legend=env_name) + + step_score = figure(title='Sweep', xlabel='Steps', ylabel='Scores', legend='Trial') + scatter(step_score, steps, scores, costs, legend=env_name) + + cost_score = figure(title='Sweep', xlabel='Cost', ylabel='Scores', legend='Trial') + scatter(cost_score, costs, scores, steps, legend=env_name) figs = [] hypers = [ - 'train/learning_rate', - 'train/ent_coef', - 'train/gamma', - 'train/gae_lambda', - 'train/vtrace_rho_clip', - 'train/vtrace_c_clip', - 'train/clip_coef', - 'train/vf_clip_coef', - 'train/vf_coef', - 'train/max_grad_norm', - 'train/adam_beta1', - 'train/adam_beta2', - 'train/adam_eps', - 'train/prio_alpha', - 'train/prio_beta0', - 'train/bptt_horizon', - 'train/num_minibatches', - 'train/minibatch_size', - 'policy/hidden_size', - 'env/frameskip', - 'env/num_envs', + #'train/learning_rate', + #'train/ent_coef', + #'train/gamma', + #'train/gae_lambda', + #'train/vtrace_rho_clip', + #'train/vtrace_c_clip', + #'train/clip_coef', + #'train/vf_clip_coef', + #'train/vf_coef', + #'train/max_grad_norm', + #'train/adam_beta1', + #'train/adam_beta2', + #'train/adam_eps', + #'train/prio_alpha', + #'train/prio_beta0', + #'train/bptt_horizon', + #'train/num_minibatches', + #'train/minibatch_size', + #'policy/hidden_size', + #'env/frameskip', + #'env/num_envs', ] for hyper in hypers: f = figure(title=hyper, xlabel=hyper, ylabel='Score', legend='Ablate') @@ -311,14 +319,16 @@ def layout(): s = [scores[i] for i in idxs] ss = [np.log(steps[i]) for i in idxs] c = [costs[i] for i in idxs] - scatter(f, v, s, ss, legend='Breakout') + scatter(f, v, s, ss, legend=env_name) figs.append(f) #pareto_steps, pareto_costs, pareto_scores = pareto_points(steps, costs, scores) #plot_lines(fig3, [pareto_steps], [pareto_costs]) layout = html.Div([ html.H1('The Puffer Frontier Project', style={'textAlign': 'center'}), - dcc.Graph(figure=frontier), + dcc.Graph(figure=step_cost), + dcc.Graph(figure=step_score), + dcc.Graph(figure=cost_score), html.Br(), *[dcc.Graph(figure=f) for f in figs] ]) From 9f017aab3879bc9b91a622531474106c0a760250 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 24 Sep 2025 03:28:56 +0000 Subject: [PATCH 015/188] small fix --- plottest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plottest.py b/plottest.py index 4c2b367ba..9423623a9 100644 --- a/plottest.py +++ b/plottest.py @@ -104,6 +104,8 @@ def scatter(fig, x, y, c, legend='Trial', log_x=False, i=0): colors = [] for e in c: v = (e - mmin)/(mmax - mmin) + if v < 0.001: + v = 0.001 colors.append(f'rgb(0, 0.5, {v})') #c = (np.array(c) - min(c))/(max(c) - min(c)) From 663bd4f1e2efc29cc94507e973420fd68801bc37 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 24 Sep 2025 21:40:16 +0000 Subject: [PATCH 016/188] Plot colors --- pufferlib/ocean/plot/plot.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pufferlib/ocean/plot/plot.c b/pufferlib/ocean/plot/plot.c index 6fad11f22..d64999f80 100644 --- a/pufferlib/ocean/plot/plot.c +++ b/pufferlib/ocean/plot/plot.c @@ -3,6 +3,11 @@ #include #include "raylib.h" +const Color PUFF_RED = (Color){187, 0, 0, 255}; +const Color PUFF_CYAN = (Color){0, 187, 187, 255}; +const Color PUFF_WHITE = (Color){241, 241, 241, 241}; +const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; + int main(void) { // Read CSV file FILE *fp = fopen("pufferlib/ocean/plot/data.csv", "r"); @@ -70,11 +75,11 @@ int main(void) { while (!WindowShouldClose()) { BeginDrawing(); - ClearBackground(RAYWHITE); + ClearBackground(PUFF_BACKGROUND); // Draw axes - DrawLine(margin, margin, margin, screenHeight - margin, BLACK); // Y-axis - DrawLine(margin, screenHeight - margin, screenWidth - margin, screenHeight - margin, BLACK); // X-axis + DrawLine(margin, margin, margin, screenHeight - margin, PUFF_WHITE); // Y-axis + DrawLine(margin, screenHeight - margin, screenWidth - margin, screenHeight - margin, PUFF_WHITE); // X-axis // Plot lines for (int j = 0; j < num_points - 1; j++) { @@ -82,7 +87,7 @@ int main(void) { float py1 = (screenHeight - margin) - (y[j] - min_y) / dy * (screenHeight - 2 * margin); float px2 = margin + (x[j + 1] - min_x) / dx * (screenWidth - 2 * margin); float py2 = (screenHeight - margin) - (y[j + 1] - min_y) / dy * (screenHeight - 2 * margin); - DrawLine(px1, py1, px2, py2, BLUE); + DrawLine(px1, py1, px2, py2, PUFF_CYAN); } EndDrawing(); From a6c84459379913b7ca341de76c5d322c59aa2c76 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 26 Sep 2025 17:54:50 +0000 Subject: [PATCH 017/188] Headless video save demo --- pufferlib/ocean/video/video.c | 140 ++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 pufferlib/ocean/video/video.c diff --git a/pufferlib/ocean/video/video.c b/pufferlib/ocean/video/video.c new file mode 100644 index 000000000..5d608a6b4 --- /dev/null +++ b/pufferlib/ocean/video/video.c @@ -0,0 +1,140 @@ +/* Quick rendering demo. You can use Xvfb to make it headless. + * It is faster than writing individual frames to a file. + * + * apt install ffmpeg Xvfb + * Xvfb :99 -screen 0 1280x720x24 & + * + * Separate terminal: + * bash scripts/build_ocean video fast + * export DISPLAY=:99 + * ./video + */ + +#include +#include "rlgl.h" +#include +#include +#include +#include +#include + +const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; + +typedef struct { + int pipefd[2]; + pid_t pid; +} VideoRecorder; + +bool OpenVideo(VideoRecorder *recorder) { + if (pipe(recorder->pipefd) == -1) { + TraceLog(LOG_ERROR, "Failed to create pipe"); + return false; + } + + recorder->pid = fork(); + if (recorder->pid == -1) { + TraceLog(LOG_ERROR, "Failed to fork"); + return false; + } + + if (recorder->pid == 0) { // Child process: run ffmpeg + close(recorder->pipefd[1]); + dup2(recorder->pipefd[0], STDIN_FILENO); + close(recorder->pipefd[0]); + execlp("ffmpeg", "ffmpeg", + "-y", + "-f", "rawvideo", + "-pix_fmt", "rgba", + "-s", TextFormat("%dx%d", GetScreenWidth(), GetScreenHeight()), + "-r", "60", + "-i", "-", + "-c:v", "libx264", + "-pix_fmt", "yuv420p", + "-preset", "fast", + "-crf", "18", + "-loglevel", "error", + "output.mp4", + NULL); + TraceLog(LOG_ERROR, "Failed to launch ffmpeg"); + return false; + } + + close(recorder->pipefd[0]); // Close read end in parent + return true; +} + +void WriteFrame(VideoRecorder *recorder) { + int width = GetScreenWidth(); + int height = GetScreenHeight(); + unsigned char *screen_data = rlReadScreenPixels(width, height); + write(recorder->pipefd[1], screen_data, width * height * 4 * sizeof(*screen_data)); + RL_FREE(screen_data); +} + +void CloseVideo(VideoRecorder *recorder) { + close(recorder->pipefd[1]); + wait(NULL); +} + +int main(void) { + const int screenWidth = 800; + const int screenHeight = 600; + const int maxFrames = 300; + float elapsedTime = 0; + float writeFPS = 0; + + InitWindow(screenWidth, screenHeight, "Headless video saving"); + SetTargetFPS(6000); // High FPS so we can time this test + + VideoRecorder recorder; + if (!OpenVideo(&recorder)) { + CloseWindow(); + return -1; + } + + // Sample program + Texture2D texture = LoadTexture("resources/shared/puffers.png"); + Rectangle rightRec = {0.0f, 0.0f, 128.0f, 128.0f}; + Rectangle leftRec = {0.0f, 128.0f, 128.0f, 128.0f}; + Vector2 pos = {(float)(screenWidth/2.0f), (float)(screenHeight/2.0f)}; + Vector2 vel= {3.0f, 3.0f}; + + int frame = 0; + double startTime = GetTime(); + while (!WindowShouldClose()) { + pos.x += vel.x; + pos.y += vel.y; + if (pos.x <= 0 || pos.x + 128 >= screenWidth) vel.x = -vel.x; + if (pos.y <= 0 || pos.y + 128 >= screenHeight) vel.y = -vel.y; + + // Render as normal; no changes required + BeginDrawing(); + ClearBackground(PUFF_BACKGROUND); + DrawTextureRec(texture, (vel.x >= 0) ? rightRec : leftRec, pos, WHITE); + DrawText("Headless video saving demo", 20, 20, 20, WHITE); + EndDrawing(); + + // Write a single frame + if (frame < maxFrames) { + WriteFrame(&recorder); + frame++; + } + + // Don't forget to close the file + if (frame >= maxFrames) { + double endTime = GetTime(); // End timing + elapsedTime = endTime - startTime; + writeFPS = (elapsedTime > 0) ? maxFrames / elapsedTime : 0; + CloseVideo(&recorder); + break; + } + } + + UnloadTexture(texture); + CloseWindow(); + + printf("Wrote %d frames in %.2f seconds (%.2f FPS)\n", + maxFrames, elapsedTime, writeFPS); + + return 0; +} From 6d97022157667e2117a692ed45b18844f7ec1087 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Mon, 29 Sep 2025 18:14:18 +0000 Subject: [PATCH 018/188] Temp garbo --- plottest.py | 673 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 579 insertions(+), 94 deletions(-) diff --git a/plottest.py b/plottest.py index 9423623a9..a857d9a82 100644 --- a/plottest.py +++ b/plottest.py @@ -1,7 +1,10 @@ from dash import Dash, html, dcc +from dash.dependencies import Input, Output import plotly.graph_objects as go import numpy as np import json +import glob +import os # Global styling variables FONT_FAMILY = 'Arial' @@ -40,6 +43,30 @@ color=FONT_COLOR ) +HYPERS = [ + 'train/learning_rate', + 'train/ent_coef', + 'train/gamma', + 'train/gae_lambda', + 'train/vtrace_rho_clip', + 'train/vtrace_c_clip', + 'train/clip_coef', + 'train/vf_clip_coef', + 'train/vf_coef', + 'train/max_grad_norm', + 'train/adam_beta1', + 'train/adam_beta2', + 'train/adam_eps', + 'train/prio_alpha', + 'train/prio_beta0', + 'train/bptt_horizon', + 'train/num_minibatches', + 'train/minibatch_size', + 'policy/hidden_size', + 'env/frameskip', + 'env/num_envs', +] + def rgba(hex, alpha): return f"rgba({int(hex[1:3], 16)}, {int(hex[3:5], 16)}, {int(hex[5:7], 16)}, {alpha})" @@ -101,12 +128,19 @@ def scatter(fig, x, y, c, legend='Trial', log_x=False, i=0): #vals = [(c - mmin)/(mmax - mmin) for c in c] #vals = [max(0.01, v) for v in vals] - colors = [] - for e in c: - v = (e - mmin)/(mmax - mmin) - if v < 0.001: - v = 0.001 - colors.append(f'rgb(0, 0.5, {v})') + if isinstance(c, str): + colors = c + else: + colors = [] + for e in c: + if mmin != mmax: + v = (e - mmin)/(mmax - mmin) + else: + v = e + + if v < 0.001: + v = 0.001 + colors.append(f'rgb(0, 0.5, {v})') #c = (np.array(c) - min(c))/(max(c) - min(c)) fig.add_trace( @@ -150,20 +184,109 @@ def plot_group(fig, xx, yy, xlabel='Performance', legend='Trial', log_x=False, i ) ) -def pareto_points(steps, costs, scores): +def plot_quantiles(fig, x, y, s, xlabel='Performance', legend='Trial', log_x=False, i=0): + # Ensure inputs are numpy arrays + x = np.array(x) + y = np.array(y) + s = np.array(s) + + # Sort data by x for smooth plotting + sort_idx = np.argsort(x) + x = x[sort_idx] + y = y[sort_idx] + s = s[sort_idx] + + # Define quantile thresholds (in descending order for proper range filtering) + quantile_thresholds = [0.95, 0.9, 0.75, 0.5, 0.25, 0.0] + quantile_thresholds = [e*s.max() for e in quantile_thresholds] + colors = ['blue', 'green', 'orange', 'red'] + + # Plot center lines and scatter for each quantile range + for j, (q, color) in enumerate(zip(quantile_thresholds, colors)): + # Define the range for this quantile + if j == 0: + # Highest quantile: s >= q + mask = s >= q + else: + # Other quantiles: q <= s < previous_q + prev_q = quantile_thresholds[j - 1] + mask = (s >= q) & (s < prev_q) + + if np.sum(mask) <= 5: + continue # Skip + + fx = x[mask] + fy = y[mask] + fs = np.ones_like(fx) # More robust to bin scores into quantiles + + #px, py, ps = pareto_points(fx, fy, fs) + fx, fy, fs = pareto_points(fx, fy, fs, 0.1) + + fx = np.array(fx) + fy = np.array(fy) + fs = np.array(fs) + + px, py, y_lower, y_upper = loess_fit(fx, fs, fy, n_bins=10, frac=0.4) + + #x_q = x[mask] + #y_q = y[mask] + + # Compute moving average for center line + #y_mean = np.convolve(y_q, np.ones(window_size)/window_size, mode='valid') + # Adjust x_q to match the length of y_mean (trim edges due to convolution) + #trim = (window_size - 1) // 2 + #x_mean = x_q[trim:len(x_q)-trim] + + #if len(x_mean) <= 1: + # continue # Skip if not enough points after trimming + + # Plot center line + fig.add_trace( + go.Scatter( + x=px, + y=py, + mode='lines', + name=f'{legend} Q{q:.2f}', + line=dict( + color=color, + width=LINE_WIDTH + ) + ) + ) + + # Plot scatter for points in this quantile range + fig.add_trace( + go.Scatter( + x=fx, + y=fy, + mode='markers', + name=f'{legend} Q{q:.2f} Points', + marker=dict( + color=color, + size=5 + ), + showlegend=False # Hide scatter legend to avoid clutter + ) + ) + + # Update axes + fig.update_xaxes(title_text=xlabel) + if log_x: + fig.update_xaxes(type='log') + + return fig + +def pareto_points(steps, costs, scores, soft=0.0): pareto_steps = [] pareto_costs = [] pareto_scores = [] max_score = max(scores) for i in range(len(steps)): - if scores[i] < 0.25*max_score: - continue - - higher_score = [s for s in scores if s > scores[i]] - lower_steps = [s for s in steps if s < scores[i]] - lower_cost = [c for c in costs if c < costs[i]] - better = [scores[j] > scores[i] and - costs[j] < costs[i] and steps[j] < steps[i] + #if scores[i] < 0.25*max_score: + # continue + + better = [scores[j] >= scores[i] and + costs[j] < costs[i]*(1 - soft) and steps[j] < steps[i]*(1 - soft) for j in range(len(scores))] if not any(better): pareto_steps.append(steps[i]) @@ -214,46 +337,253 @@ def load_hyper_data(filename): all_perf = np.array(all_perf).reshape(3, -1) return all_hyper, all_perf + def load_sweep_data(path): - import glob - experiments = [] + data = {} + keys = None for fpath in glob.glob(path): with open(fpath, 'r') as f: exp = json.load(f) - data = {} - for kk, vv in exp.items(): - if kk == 'data': - for k, v in exp[kk][-1].items(): - data[k] = v + if not data: + for kk in exp.keys(): + if kk == 'data': + for k, v in exp[kk][-1].items(): + data[k] = [] + else: + data[kk] = [] + + discard = False + for kk in list(data.keys()): + if kk not in exp and kk not in exp['data'][-1]: + discard = True + break + + if discard: + continue + + for kk in list(data.keys()): + if kk in exp: + data[kk].append(exp[kk]) else: - data[kk] = vv + data[kk].append(exp['data'][-1][kk]) + + return data + +def cached_sweep_load(path): + cache_file = os.path.join(path, 'cache.json') + if not os.path.exists(cache_file): + data = load_sweep_data(os.path.join(path, '*.json')) + with open(cache_file, 'w') as f: + json.dump(data, f) - experiments.append(data) + with open(cache_file, 'r') as f: + data = json.load(f) - return experiments + return data -def layout(): - #fig1 = figure(title='Hyperparameter Ablation', xlabel='Learning Rate', legend='Ablate', xaxis_type='log') - #all_hyper, all_perf = load_hyper_data('puffer_pong_learning_rate.npz') - #plot_group(fig1, all_hyper, all_perf, legend='Pong') - #all_hyper, all_perf = load_hyper_data('puffer_breakout_learning_rate.npz') - #plot_group(fig1, all_hyper, all_perf, legend='Breakout', i=1) + ''' + # Step 1: Check if cache exists; if not, create it using load_sweep_data + if not os.path.exists(cache_file): + experiments = load_sweep_data(os.path.join(path, '*.json')) + # Create cache as list of [filename, data] pairs + cache_data = [ + [os.path.basename(fpath), exp] + for fpath, exp in zip(glob.glob(os.path.join(path, '*.json')), experiments) + if not fpath.endswith('cache.json') # Exclude cache file itself + ] + # Write cache + with open(cache_file, 'w') as f: + json.dump(cache_data, f) + + # Step 2: Load existing cache + with open(cache_file, 'r') as f: + cache_data = json.load(f) + + # Convert cache to dict: {filename: data} + cache_dict = {item[0]: item[1] for item in cache_data} + + # Get current files in directory (excluding cache.json) + current_files = set( + os.path.basename(f) for f in glob.glob(os.path.join(path, '*.json')) + if not f.endswith('cache.json') + ) + cached_files = set(cache_dict.keys()) + + # Step 3: Check for new files not in cache + new_files = current_files - cached_files + if new_files: + # Load only new files using a modified load_sweep_data + new_experiments = [] + new_file_paths = [os.path.join(path, fname) for fname in new_files] + for fpath in new_file_paths: + with open(fpath, 'r') as f: + exp = json.load(f) + data = {} + for kk, vv in exp.items(): + if kk == 'data': + for k, v in exp[kk][-1].items(): + data[k] = v + else: + data[kk] = vv + new_experiments.append([os.path.basename(fpath), data]) + + # Update cache with new experiments + cache_data.extend(new_experiments) + # Write updated cache + with open(cache_file, 'w') as f: + json.dump(cache_data, f) + + return [e[1] for e in cache_data] + return cache_data + + # Rebuild cache_dict to include any new files + cache_dict = {item[0]: item[1] for item in cache_data} + + # Return cached data as a dictionary + return cache_dict + ''' - #fig2 = figure(title='Seed Sensitivity', xlabel='Uptime', legend='Ablate') - #all_uptime, all_perf = load_seed_data('puffer_pong_seeds.npz') - #plot_group(fig2, all_uptime, all_perf, legend='Pong') - #all_uptime, all_perf = load_seed_data('puffer_breakout_seeds.npz') - #plot_group(fig2, all_uptime, all_perf, legend='Breakout', i=1) - #all_uptime, all_perf = load_seed_data('puffer_connect4_seeds.npz') - #plot_group(fig2, all_uptime, all_perf, legend='Connect4', i=2) - env_name = 'pong' +from statsmodels.nonparametric.smoothers_lowess import lowess +def compute_bin_stats(x, s, y, n_bins=20, overlap=0.5, score_threshold=0.95): + """ + Bin data, select high-performing points, and compute weighted stats. + x: steps (log-scaled compute/samples), s: scores, y: hyperparameter values + """ + # Log-scale x to handle compute/samples range + x_log = np.log10(x) + x_min, x_max = x_log.min(), x_log.max() + bin_width = (x_max - x_min) / (n_bins * (1 - overlap)) + bin_centers = np.linspace(x_min, x_max, n_bins) + + y_weighted = [] + y_lower = [] + y_upper = [] + + for center in bin_centers: + # Define bin boundaries with overlap + bin_start = center - bin_width / 2 + bin_end = center + bin_width / 2 + mask = (x_log >= bin_start) & (x_log <= bin_end) + + # Select high-performing points (scores within 95% of max in bin) + bin_s = s[mask] + if len(bin_s) == 0: + y_weighted.append(np.nan) + y_lower.append(np.nan) + y_upper.append(np.nan) + continue + s_max = bin_s.max() + high_perf_mask = bin_s >= score_threshold * s_max + + # Compute weighted mean and quantiles for y + bin_y = y[mask][high_perf_mask] + bin_s = bin_s[high_perf_mask] + if len(bin_y) == 0: + y_weighted.append(np.nan) + y_lower.append(np.nan) + y_upper.append(np.nan) + continue + weights = bin_s / bin_s.sum() # Normalize scores as weights + y_mean = np.average(bin_y, weights=weights) + y_quantiles = np.percentile(bin_y, [25, 75]) # IQR for stability range + + y_weighted.append(y_mean) + y_lower.append(y_quantiles[0]) + y_upper.append(y_quantiles[1]) + + return bin_centers, np.array(y_weighted), np.array(y_lower), np.array(y_upper) + +def loess_fit(x, s, y, n_bins=20, frac=0.4): + """ + Perform LOESS fit on binned data for smoothed curve and ribbons. + """ + # Compute bin statistics + bin_centers, y_weighted, y_lower, y_upper = compute_bin_stats(x, s, y, n_bins=n_bins) + + # Remove NaNs for LOESS + valid_mask = ~np.isnan(y_weighted) + bin_centers = bin_centers[valid_mask] + y_weighted = y_weighted[valid_mask] + y_lower = y_lower[valid_mask] + y_upper = y_upper[valid_mask] + + # Apply LOESS to weighted mean, lower, and upper bounds + smoothed_y = lowess(y_weighted, bin_centers, frac=frac, return_sorted=True) + smoothed_lower = lowess(y_lower, bin_centers, frac=frac, return_sorted=True) + smoothed_upper = lowess(y_upper, bin_centers, frac=frac, return_sorted=True) + + # Convert back to original x scale + x_smooth = 10 ** smoothed_y[:, 0] # Undo log-scale + y_smooth = smoothed_y[:, 1] + y_smooth_lower = smoothed_lower[:, 1] + y_smooth_upper = smoothed_upper[:, 1] + + return x_smooth, y_smooth, y_smooth_lower, y_smooth_upper + +#fig1 = figure(title='Hyperparameter Ablation', xlabel='Learning Rate', legend='Ablate', xaxis_type='log') +#all_hyper, all_perf = load_hyper_data('puffer_pong_learning_rate.npz') +#plot_group(fig1, all_hyper, all_perf, legend='Pong') +#all_hyper, all_perf = load_hyper_data('puffer_breakout_learning_rate.npz') +#plot_group(fig1, all_hyper, all_perf, legend='Breakout', i=1) + +#fig2 = figure(title='Seed Sensitivity', xlabel='Uptime', legend='Ablate') +#all_uptime, all_perf = load_seed_data('puffer_pong_seeds.npz') +#plot_group(fig2, all_uptime, all_perf, legend='Pong') +#all_uptime, all_perf = load_seed_data('puffer_breakout_seeds.npz') +#plot_group(fig2, all_uptime, all_perf, legend='Breakout', i=1) +#all_uptime, all_perf = load_seed_data('puffer_connect4_seeds.npz') +#plot_group(fig2, all_uptime, all_perf, legend='Connect4', i=2) + +env_name = 'breakout' +EXPERIMENTS = cached_sweep_load(f'experiments/logs/puffer_{env_name}') - experiments = load_sweep_data(f'experiments/logs/puffer_{env_name}/*.json') - steps = [e['agent_steps'] for e in experiments] - costs = [e['cost'] for e in experiments] - scores = [e['environment/score'] for e in experiments] +# Initialize Dash app +app = Dash() +app.layout = html.Div([ + html.H1('The Puffer Frontier Project', style={'textAlign': 'center'}), + #dcc.Graph(figure=step_cost), + #dcc.Graph(figure=step_score), + #dcc.Graph(figure=cost_score), + html.Br(), + dcc.Dropdown( + id="pareto-dropdown", + options=[ + {"label": 'front', "value": 'front'}, + {"label": 'cost', "value": 'cost'}, + {"label": 'score', "value": 'score'}, + ], + value="front", + style={"width": "50%"} + ), + dcc.Graph(id='pareto'), + dcc.Slider( + id='hyper-agg-slider', + min=0.0, + max=1.0, + step=0.05, + value=0.95, + marks={i: str(0.05*i) for i in range(0, 21)} + ), + dcc.Graph(id='hyper-agg'), + dcc.Dropdown( + id="hyper-dropdown", + options=[{"label": key, "value": key} for key in HYPERS], + value="train/learning_rate", + style={"width": "50%"} + ), + dcc.Graph(id='hyper') +]) + +@app.callback( + Output("pareto", "figure"), + Input("pareto-dropdown", "value") +) +def update_pareto_plot(key): + steps = EXPERIMENTS['agent_steps'] + costs = EXPERIMENTS['cost'] + scores = EXPERIMENTS['environment/score'] # Filter outliers ''' @@ -265,22 +595,177 @@ def layout(): ''' # Adjust steps - has_skip = ['env/frameskip' in e for e in experiments] - if any(has_skip): - experiments = [e for e, s in zip(experiments, has_skip) if s] - steps = [e['env/frameskip']*e['agent_steps'] for e, s - in zip(experiments, has_skip) if s] - costs = [e['cost'] for e, s in zip(experiments, has_skip) if s] - scores = [e['environment/score'] for e, s in zip(experiments, has_skip) if s] + if 'env/frameskip' in EXPERIMENTS: + skip = EXPERIMENTS['env/frameskip'] + steps = [n*m for n, m in zip(steps, skip)] # Filter by score + ''' max_score = max(scores) idxs = [i for i, s in enumerate(scores) if s > 0.95*max_score] filtered_steps = [steps[i] for i in idxs] filtered_costs = [costs[i] for i in idxs] filtered_scores = [scores[i] for i in idxs] + ''' + + #filtered_steps, filtered_costs, filtered_scores = pareto_points(steps, costs, scores) # Header plot + if key == 'front': + f = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') + plot_quantiles(f, steps, costs, scores, + xlabel='Steps', legend='Trial', log_x=False, i=0) + #scatter(f, filtered_steps, filtered_costs, filtered_scores, legend=env_name) + elif key == 'score': + f = figure(title='Sweep', xlabel='Steps', ylabel='Scores', legend='Trial') + scatter(f, steps, scores, costs, legend=env_name) + elif key == 'cost': + f = figure(title='Sweep', xlabel='Cost', ylabel='Scores', legend='Trial') + scatter(f, costs, scores, steps, legend=env_name) + + return f + + figs = [] + + f = figure(title=hyper, xlabel='Steps', ylabel='Hyper', legend='Ablate') + #idxs = [i for i, e in enumerate(experiments) if hyper in e] + y = [EXPERIMENTS[i][hyper] for i in idxs] + s = [scores[i] for i in idxs] + #ss = [np.log(steps[i]) for i in idxs] + x = [steps[i] for i in idxs] + #c = [costs[i] for i in idxs] + + scatter(f, x, y, s, legend=env_name) + + x = np.array(x) + s = np.array(s) + y = np.array(y) + x_smooth, y_smooth, y_lower, y_upper = loess_fit(x, s, y, n_bins=20, frac=0.4) + s = np.ones_like(x_smooth) + scatter(f, x_smooth, y_smooth, 'red', legend=env_name) + + #scatter(f, v, s, ss, legend=env_name) + #scatter(f, x, y, s, legend=env_name) + #figs.append(f) + + #pareto_steps, pareto_costs, pareto_scores = pareto_points(steps, costs, scores) + #plot_lines(fig3, [pareto_steps], [pareto_costs]) + return f + + + df = data_options[selected_dataset] + fig = px.scatter(df, x="x", y="y", title=f"Scatter Plot for {selected_dataset}") + return fig + + +@app.callback( + Output("hyper", "figure"), + Input("hyper-dropdown", "value") +) +def update_hyper_plot(hyper): + steps = EXPERIMENTS['agent_steps'] + costs = EXPERIMENTS['cost'] + scores = EXPERIMENTS['environment/score'] + + # Filter outliers + ''' + idxs = [i for i, s in enumerate(steps) if s < 1e6] + experiments = [experiments[i] for i in idxs] + steps = [steps[i] for i in idxs] + costs = [costs[i] for i in idxs] + scores = [scores[i] for i in idxs] + ''' + + # Adjust steps + if 'env/frameskip' in EXPERIMENTS: + skip = EXPERIMENTS['env/frameskip'] + steps = [n*m for n, m in zip(steps, skip)] + + # Filter by score + max_score = max(scores) + idxs = [i for i, s in enumerate(scores) if s > 0.95*max_score] + filtered_steps = [steps[i] for i in idxs] + filtered_costs = [costs[i] for i in idxs] + filtered_scores = [scores[i] for i in idxs] + + # Header plot + ''' + step_cost = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') + scatter(step_cost, filtered_steps, filtered_costs, filtered_scores, legend=env_name) + + step_score = figure(title='Sweep', xlabel='Steps', ylabel='Scores', legend='Trial') + scatter(step_score, steps, scores, costs, legend=env_name) + + cost_score = figure(title='Sweep', xlabel='Cost', ylabel='Scores', legend='Trial') + scatter(cost_score, costs, scores, steps, legend=env_name) + ''' + + figs = [] + + f = figure(title=hyper, xlabel='Steps', ylabel='Hyper', legend='Ablate') + #idxs = [i for i, e in enumerate(experiments) if hyper in e] + y = [EXPERIMENTS[hyper][i] for i in idxs] + s = [scores[i] for i in idxs] + #ss = [np.log(steps[i]) for i in idxs] + x = [steps[i] for i in idxs] + #c = [costs[i] for i in idxs] + + scatter(f, x, y, s, legend=env_name) + + x = np.array(x) + s = np.array(s) + y = np.array(y) + x_smooth, y_smooth, y_lower, y_upper = loess_fit(x, s, y, n_bins=20, frac=0.4) + s = np.ones_like(x_smooth) + + plot_lines(f, [x_smooth], [y_smooth]) + #scatter(f, x_smooth, y_smooth, 'red', legend=env_name) + + #scatter(f, v, s, ss, legend=env_name) + #scatter(f, x, y, s, legend=env_name) + #figs.append(f) + + #pareto_steps, pareto_costs, pareto_scores = pareto_points(steps, costs, scores) + #plot_lines(fig3, [pareto_steps], [pareto_costs]) + return f + + + df = data_options[selected_dataset] + fig = px.scatter(df, x="x", y="y", title=f"Scatter Plot for {selected_dataset}") + return fig + +@app.callback( + Output("hyper-agg", "figure"), + Input("hyper-agg-slider", "value") +) +def update_hyper_agg_plot(thresh): + steps = EXPERIMENTS['agent_steps'] + costs = EXPERIMENTS['cost'] + scores = EXPERIMENTS['environment/score'] + + # Filter outliers + ''' + idxs = [i for i, s in enumerate(steps) if s < 1e6] + experiments = [experiments[i] for i in idxs] + steps = [steps[i] for i in idxs] + costs = [costs[i] for i in idxs] + scores = [scores[i] for i in idxs] + ''' + + # Adjust steps + if 'env/frameskip' in EXPERIMENTS: + skip = EXPERIMENTS['env/frameskip'] + steps = [n*m for n, m in zip(steps, skip)] + + # Filter by score + max_score = max(scores) + idxs = [i for i, s in enumerate(scores) if s > thresh*max_score] + filtered_steps = [steps[i] for i in idxs] + filtered_costs = [costs[i] for i in idxs] + filtered_scores = [scores[i] for i in idxs] + + # Header plot + ''' step_cost = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') scatter(step_cost, filtered_steps, filtered_costs, filtered_scores, legend=env_name) @@ -289,59 +774,59 @@ def layout(): cost_score = figure(title='Sweep', xlabel='Cost', ylabel='Scores', legend='Trial') scatter(cost_score, costs, scores, steps, legend=env_name) + ''' figs = [] - hypers = [ - #'train/learning_rate', - #'train/ent_coef', - #'train/gamma', - #'train/gae_lambda', - #'train/vtrace_rho_clip', - #'train/vtrace_c_clip', - #'train/clip_coef', - #'train/vf_clip_coef', - #'train/vf_coef', - #'train/max_grad_norm', - #'train/adam_beta1', - #'train/adam_beta2', - #'train/adam_eps', - #'train/prio_alpha', - #'train/prio_beta0', - #'train/bptt_horizon', - #'train/num_minibatches', - #'train/minibatch_size', - #'policy/hidden_size', - #'env/frameskip', - #'env/num_envs', - ] - for hyper in hypers: - f = figure(title=hyper, xlabel=hyper, ylabel='Score', legend='Ablate') - idxs = [i for i, e in enumerate(experiments) if hyper in e] - v = [experiments[i][hyper] for i in idxs] + + f = figure(title='bar', xlabel='Steps', ylabel='Hyper', legend='Ablate') + f.update_yaxes(type='log') + for hyper in HYPERS: + #idxs = [i for i, e in enumerate(experiments) if hyper in e] + y = [EXPERIMENTS[hyper][i] for i in idxs] s = [scores[i] for i in idxs] - ss = [np.log(steps[i]) for i in idxs] - c = [costs[i] for i in idxs] - scatter(f, v, s, ss, legend=env_name) - figs.append(f) + #ss = [np.log(steps[i]) for i in idxs] + x = [steps[i] for i in idxs] + #c = [costs[i] for i in idxs] + + + ymin = min(y) + ymax = max(y) + f.add_trace( + go.Bar( + x=[0], + y=[ymax - ymin], + base=ymin, + name=hyper + ) + ) + + #scatter(f, x, y, s, legend=env_name) + + x = np.array(x) + s = np.array(s) + y = np.array(y) + x_smooth, y_smooth, y_lower, y_upper = loess_fit(x, s, y, n_bins=20, frac=0.4) + s = np.ones_like(x_smooth) + + #plot_lines(f, [x_smooth], [y_smooth]) + #scatter(f, x_smooth, y_smooth, 'red', legend=env_name) + + #scatter(f, v, s, ss, legend=env_name) + #scatter(f, x, y, s, legend=env_name) + #figs.append(f) #pareto_steps, pareto_costs, pareto_scores = pareto_points(steps, costs, scores) #plot_lines(fig3, [pareto_steps], [pareto_costs]) - layout = html.Div([ - html.H1('The Puffer Frontier Project', style={'textAlign': 'center'}), - dcc.Graph(figure=step_cost), - dcc.Graph(figure=step_score), - dcc.Graph(figure=cost_score), - html.Br(), - *[dcc.Graph(figure=f) for f in figs] - ]) - return layout + return f + df = data_options[selected_dataset] + fig = px.scatter(df, x="x", y="y", title=f"Scatter Plot for {selected_dataset}") + return fig + -# Initialize Dash app -app = Dash() # Set layout with static graph -app.layout = layout +#app.layout = layout if __name__ == '__main__': - app.run(host='0.0.0.0', port=8090) + app.run(host='0.0.0.0', port=8080) From 7cb168cd22d929982f06dab3a4e2cbe46ba03c11 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 30 Sep 2025 01:32:25 +0000 Subject: [PATCH 019/188] Still a mess but better --- plottest.py | 495 ++++++++++++++++++++++++++++------------------------ 1 file changed, 264 insertions(+), 231 deletions(-) diff --git a/plottest.py b/plottest.py index a857d9a82..bf4a689db 100644 --- a/plottest.py +++ b/plottest.py @@ -67,9 +67,29 @@ 'env/num_envs', ] +ALL_KEYS = [ + 'agent_steps', + 'cost', + 'environment/score' +] + HYPERS + def rgba(hex, alpha): return f"rgba({int(hex[1:3], 16)}, {int(hex[3:5], 16)}, {int(hex[5:7], 16)}, {alpha})" +def band(experiments, key, qmin=0.0, qmax=1.0): + mmax = np.array(experiments[key]).max() + top = qmax * mmax + bot = qmin * mmax + filtered = {k: [] for k in experiments} + for i, score in enumerate(experiments[key]): + if score < bot or score > top: + continue + + for k, v in experiments.items(): + filtered[k].append(v[i]) + + return filtered + def mean_conf(xx, yy): x_min = min([min(x) for x in xx]) x_max = max([max(x) for x in xx]) @@ -107,14 +127,14 @@ def figure(title='The Puffer Frontier Project', fig.update_yaxes(showgrid=False) return fig -def plot_lines(fig, xx, yy): +def plot_lines(fig, xx, yy, name='Trial'): for i, (x, y) in enumerate(zip(xx, yy)): fig.add_trace( go.Scatter( x=x, y=y, mode='lines', - name=f'Trial {i+1}', + name=name, line=dict( color=LINE_COLORS[i % len(LINE_COLORS)], width=LINE_WIDTH @@ -122,7 +142,7 @@ def plot_lines(fig, xx, yy): ) ) -def scatter(fig, x, y, c, legend='Trial', log_x=False, i=0): +def scatter(fig, x, y, c, legend='Trial', log_x=False, i=0, showlegend=True): mmin = min(c) mmax = max(c) #vals = [(c - mmin)/(mmax - mmin) for c in c] @@ -149,6 +169,7 @@ def scatter(fig, x, y, c, legend='Trial', log_x=False, i=0): y=y, mode='markers', name=legend, + showlegend=showlegend, marker=dict( color=colors, size=10 @@ -184,43 +205,26 @@ def plot_group(fig, xx, yy, xlabel='Performance', legend='Trial', log_x=False, i ) ) -def plot_quantiles(fig, x, y, s, xlabel='Performance', legend='Trial', log_x=False, i=0): - # Ensure inputs are numpy arrays - x = np.array(x) - y = np.array(y) - s = np.array(s) +def plot_quantiles(fig, experiments, thresh, xlabel='Performance', legend='Trial', log_x=False, i=0): + # Define quantile thresholds (in descending order for proper range filtering) + #quantile_thresholds = [1.0, 0.95, 0.9, 0.75, 0.5, 0.25, 0.05] + quantile_thresholds = [1.0, thresh] + colors = [['indigo', 'blue', 'green', 'yellow', 'orange', 'red'][i]] - # Sort data by x for smooth plotting - sort_idx = np.argsort(x) - x = x[sort_idx] - y = y[sort_idx] - s = s[sort_idx] + for i in range(len(quantile_thresholds) - 1): + qmin = quantile_thresholds[i + 1] + qmax = quantile_thresholds[i] - # Define quantile thresholds (in descending order for proper range filtering) - quantile_thresholds = [0.95, 0.9, 0.75, 0.5, 0.25, 0.0] - quantile_thresholds = [e*s.max() for e in quantile_thresholds] - colors = ['blue', 'green', 'orange', 'red'] - - # Plot center lines and scatter for each quantile range - for j, (q, color) in enumerate(zip(quantile_thresholds, colors)): - # Define the range for this quantile - if j == 0: - # Highest quantile: s >= q - mask = s >= q - else: - # Other quantiles: q <= s < previous_q - prev_q = quantile_thresholds[j - 1] - mask = (s >= q) & (s < prev_q) - - if np.sum(mask) <= 5: + filtered = band(experiments, 'environment/score', qmin, qmax) + + if len(filtered['environment/score']) < 5: continue # Skip - fx = x[mask] - fy = y[mask] - fs = np.ones_like(fx) # More robust to bin scores into quantiles + x = filtered['agent_steps'] + y = filtered['cost'] + s = np.ones_like(x) - #px, py, ps = pareto_points(fx, fy, fs) - fx, fy, fs = pareto_points(fx, fy, fs, 0.1) + fx, fy, fs = pareto_points(x, y, s, 0.1) fx = np.array(fx) fy = np.array(fy) @@ -246,9 +250,9 @@ def plot_quantiles(fig, x, y, s, xlabel='Performance', legend='Trial', log_x=Fal x=px, y=py, mode='lines', - name=f'{legend} Q{q:.2f}', + name=f'{legend} Q{qmin:.2f}', line=dict( - color=color, + color=colors[i], width=LINE_WIDTH ) ) @@ -260,9 +264,9 @@ def plot_quantiles(fig, x, y, s, xlabel='Performance', legend='Trial', log_x=Fal x=fx, y=fy, mode='markers', - name=f'{legend} Q{q:.2f} Points', + name=f'{legend} Q{qmin:.2f} Points', marker=dict( - color=color, + color=colors[i], size=5 ), showlegend=False # Hide scatter legend to avoid clutter @@ -536,8 +540,11 @@ def loess_fit(x, s, y, n_bins=20, frac=0.4): #all_uptime, all_perf = load_seed_data('puffer_connect4_seeds.npz') #plot_group(fig2, all_uptime, all_perf, legend='Connect4', i=2) -env_name = 'breakout' -EXPERIMENTS = cached_sweep_load(f'experiments/logs/puffer_{env_name}') +env_names = ['breakout', 'pong'] +EXPERIMENTS = { + name: cached_sweep_load(f'experiments/logs/puffer_{name}') + for name in env_names +} # Initialize Dash app app = Dash() @@ -547,17 +554,24 @@ def loess_fit(x, s, y, n_bins=20, frac=0.4): #dcc.Graph(figure=step_score), #dcc.Graph(figure=cost_score), html.Br(), - dcc.Dropdown( - id="pareto-dropdown", - options=[ - {"label": 'front', "value": 'front'}, - {"label": 'cost', "value": 'cost'}, - {"label": 'score', "value": 'score'}, - ], - value="front", - style={"width": "50%"} + dcc.Slider( + id='pareto-slider', + min=0.0, + max=1.0, + step=0.05, + value=0.95, + marks={i: str(0.05*i) for i in range(0, 21)} ), dcc.Graph(id='pareto'), + dcc.Slider( + id='hyper-stable-slider', + min=0.0, + max=1.0, + step=0.05, + value=0.95, + marks={i: str(0.05*i) for i in range(0, 21)} + ), + dcc.Graph(id='hyper-stable'), dcc.Slider( id='hyper-agg-slider', min=0.0, @@ -568,117 +582,186 @@ def loess_fit(x, s, y, n_bins=20, frac=0.4): ), dcc.Graph(id='hyper-agg'), dcc.Dropdown( - id="hyper-dropdown", - options=[{"label": key, "value": key} for key in HYPERS], - value="train/learning_rate", + id="scatter-dropdown-x", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="agent_steps", + style={"width": "50%"} + ), + dcc.Dropdown( + id="scatter-dropdown-y", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="environment/score", style={"width": "50%"} ), - dcc.Graph(id='hyper') + dcc.Graph(id='scatter') ]) @app.callback( Output("pareto", "figure"), - Input("pareto-dropdown", "value") + Input("pareto-slider", "value") ) -def update_pareto_plot(key): - steps = EXPERIMENTS['agent_steps'] - costs = EXPERIMENTS['cost'] - scores = EXPERIMENTS['environment/score'] +def update_pareto_plot(thresh): + f = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') + + for i, env in enumerate(EXPERIMENTS): + steps = EXPERIMENTS[env]['agent_steps'] + costs = EXPERIMENTS[env]['cost'] + scores = EXPERIMENTS[env]['environment/score'] + + # Adjust steps + if 'env/frameskip' in EXPERIMENTS[env]: + skip = EXPERIMENTS[env]['env/frameskip'] + steps = [n*m for n, m in zip(steps, skip)] + + # Header plot + #if key == 'front': + plot_quantiles(f, EXPERIMENTS[env], thresh, xlabel='Steps', legend=env, log_x=False, i=i) + ''' + elif key == 'score': + f = figure(title='Sweep', xlabel='Steps', ylabel='Scores', legend='Trial') + scatter(f, steps, scores, costs, legend=env_name) + elif key == 'cost': + f = figure(title='Sweep', xlabel='Cost', ylabel='Scores', legend='Trial') + scatter(f, costs, scores, steps, legend=env_name) + ''' - # Filter outliers - ''' - idxs = [i for i, s in enumerate(steps) if s < 1e6] - experiments = [experiments[i] for i in idxs] - steps = [steps[i] for i in idxs] - costs = [costs[i] for i in idxs] - scores = [scores[i] for i in idxs] - ''' + return f + +@app.callback( + Output("scatter", "figure"), + Input("scatter-dropdown-x", "value"), + Input("scatter-dropdown-y", "value") +) +def update_scatter(xkey, ykey): + for i, env in enumerate(EXPERIMENTS): + steps = EXPERIMENTS[env]['agent_steps'] + costs = EXPERIMENTS[env]['cost'] + scores = EXPERIMENTS[env]['environment/score'] # Adjust steps - if 'env/frameskip' in EXPERIMENTS: - skip = EXPERIMENTS['env/frameskip'] + if 'env/frameskip' in EXPERIMENTS[env]: + skip = EXPERIMENTS[env]['env/frameskip'] steps = [n*m for n, m in zip(steps, skip)] - # Filter by score + f = figure(title='Scatter', xlabel='Steps', ylabel='Hyper', legend='Ablate') + f.update_yaxes(type='log') + + x = EXPERIMENTS[env][xkey] + y = EXPERIMENTS[env][ykey] + c = scores + scatter(f, x, y, c, showlegend=False) + ''' + # Filter by score max_score = max(scores) idxs = [i for i, s in enumerate(scores) if s > 0.95*max_score] filtered_steps = [steps[i] for i in idxs] filtered_costs = [costs[i] for i in idxs] filtered_scores = [scores[i] for i in idxs] - ''' - #filtered_steps, filtered_costs, filtered_scores = pareto_points(steps, costs, scores) - - # Header plot - if key == 'front': - f = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') - plot_quantiles(f, steps, costs, scores, - xlabel='Steps', legend='Trial', log_x=False, i=0) - #scatter(f, filtered_steps, filtered_costs, filtered_scores, legend=env_name) - elif key == 'score': - f = figure(title='Sweep', xlabel='Steps', ylabel='Scores', legend='Trial') - scatter(f, steps, scores, costs, legend=env_name) - elif key == 'cost': - f = figure(title='Sweep', xlabel='Cost', ylabel='Scores', legend='Trial') - scatter(f, costs, scores, steps, legend=env_name) + f = figure(title='Hyper Stability', xlabel='Steps', ylabel='Hyper', legend='Ablate') + f.update_yaxes(type='log') + + for j, hyper in enumerate(HYPERS): + c = LINE_COLORS[j % len(LINE_COLORS)] + s = [scores[i] for i in idxs] + x = [steps[i] for i in idxs] + y = [EXPERIMENTS[env][hyper][i] for i in idxs] + + x, y, s = pareto_points(x, y, s, 0.1) + scatter(f, x, y, c, showlegend=False) + + x = np.array(x) + s = np.array(s) + y = np.array(y) + x_smooth, y_smooth, y_lower, y_upper = loess_fit(x, s, y, n_bins=20, frac=0.4) + s = np.ones_like(x_smooth) + + f.add_trace( + go.Scatter( + x=x_smooth, + y=y_smooth, + mode='lines', + name=hyper, + line=dict( + color=c, + width=LINE_WIDTH + ) + ) + ) + ''' return f - figs = [] - f = figure(title=hyper, xlabel='Steps', ylabel='Hyper', legend='Ablate') - #idxs = [i for i, e in enumerate(experiments) if hyper in e] - y = [EXPERIMENTS[i][hyper] for i in idxs] - s = [scores[i] for i in idxs] - #ss = [np.log(steps[i]) for i in idxs] - x = [steps[i] for i in idxs] - #c = [costs[i] for i in idxs] +@app.callback( + Output("hyper-stable", "figure"), + Input("hyper-stable-slider", "value") +) +def update_hyper_stable(thresh): + for i, env in enumerate(EXPERIMENTS): + steps = EXPERIMENTS[env]['agent_steps'] + costs = EXPERIMENTS[env]['cost'] + scores = EXPERIMENTS[env]['environment/score'] - scatter(f, x, y, s, legend=env_name) + # Adjust steps + if 'env/frameskip' in EXPERIMENTS[env]: + skip = EXPERIMENTS[env]['env/frameskip'] + steps = [n*m for n, m in zip(steps, skip)] - x = np.array(x) - s = np.array(s) - y = np.array(y) - x_smooth, y_smooth, y_lower, y_upper = loess_fit(x, s, y, n_bins=20, frac=0.4) - s = np.ones_like(x_smooth) - scatter(f, x_smooth, y_smooth, 'red', legend=env_name) + # Filter by score + max_score = max(scores) + idxs = [i for i, s in enumerate(scores) if s > 0.95*max_score] + filtered_steps = [steps[i] for i in idxs] + filtered_costs = [costs[i] for i in idxs] + filtered_scores = [scores[i] for i in idxs] - #scatter(f, v, s, ss, legend=env_name) - #scatter(f, x, y, s, legend=env_name) - #figs.append(f) + f = figure(title='Hyper Stability', xlabel='Steps', ylabel='Hyper', legend='Ablate') + f.update_yaxes(type='log') - #pareto_steps, pareto_costs, pareto_scores = pareto_points(steps, costs, scores) - #plot_lines(fig3, [pareto_steps], [pareto_costs]) - return f + for j, hyper in enumerate(HYPERS): + c = LINE_COLORS[j % len(LINE_COLORS)] + s = [scores[i] for i in idxs] + x = [steps[i] for i in idxs] + y = [EXPERIMENTS[env][hyper][i] for i in idxs] + x, y, s = pareto_points(x, y, s, 0.1) + scatter(f, x, y, c, showlegend=False) - df = data_options[selected_dataset] - fig = px.scatter(df, x="x", y="y", title=f"Scatter Plot for {selected_dataset}") - return fig + x = np.array(x) + s = np.array(s) + y = np.array(y) + x_smooth, y_smooth, y_lower, y_upper = loess_fit(x, s, y, n_bins=20, frac=0.4) + s = np.ones_like(x_smooth) + f.add_trace( + go.Scatter( + x=x_smooth, + y=y_smooth, + mode='lines', + name=hyper, + line=dict( + color=c, + width=LINE_WIDTH + ) + ) + ) + + return f @app.callback( Output("hyper", "figure"), Input("hyper-dropdown", "value") ) def update_hyper_plot(hyper): - steps = EXPERIMENTS['agent_steps'] - costs = EXPERIMENTS['cost'] - scores = EXPERIMENTS['environment/score'] - - # Filter outliers - ''' - idxs = [i for i, s in enumerate(steps) if s < 1e6] - experiments = [experiments[i] for i in idxs] - steps = [steps[i] for i in idxs] - costs = [costs[i] for i in idxs] - scores = [scores[i] for i in idxs] - ''' + for i, env in enumerate(EXPERIMENTS): + steps = EXPERIMENTS[env]['agent_steps'] + costs = EXPERIMENTS[env]['cost'] + scores = EXPERIMENTS[env]['environment/score'] # Adjust steps - if 'env/frameskip' in EXPERIMENTS: - skip = EXPERIMENTS['env/frameskip'] + if 'env/frameskip' in EXPERIMENTS[env]: + skip = EXPERIMENTS[env]['env/frameskip'] steps = [n*m for n, m in zip(steps, skip)] # Filter by score @@ -688,28 +771,12 @@ def update_hyper_plot(hyper): filtered_costs = [costs[i] for i in idxs] filtered_scores = [scores[i] for i in idxs] - # Header plot - ''' - step_cost = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') - scatter(step_cost, filtered_steps, filtered_costs, filtered_scores, legend=env_name) - - step_score = figure(title='Sweep', xlabel='Steps', ylabel='Scores', legend='Trial') - scatter(step_score, steps, scores, costs, legend=env_name) - - cost_score = figure(title='Sweep', xlabel='Cost', ylabel='Scores', legend='Trial') - scatter(cost_score, costs, scores, steps, legend=env_name) - ''' - - figs = [] - f = figure(title=hyper, xlabel='Steps', ylabel='Hyper', legend='Ablate') - #idxs = [i for i, e in enumerate(experiments) if hyper in e] - y = [EXPERIMENTS[hyper][i] for i in idxs] + y = [EXPERIMENTS[env][hyper][i] for i in idxs] s = [scores[i] for i in idxs] - #ss = [np.log(steps[i]) for i in idxs] x = [steps[i] for i in idxs] - #c = [costs[i] for i in idxs] + x, y, s = pareto_points(x, y, s, 0.1) scatter(f, x, y, s, legend=env_name) x = np.array(x) @@ -719,113 +786,79 @@ def update_hyper_plot(hyper): s = np.ones_like(x_smooth) plot_lines(f, [x_smooth], [y_smooth]) - #scatter(f, x_smooth, y_smooth, 'red', legend=env_name) - - #scatter(f, v, s, ss, legend=env_name) - #scatter(f, x, y, s, legend=env_name) - #figs.append(f) - - #pareto_steps, pareto_costs, pareto_scores = pareto_points(steps, costs, scores) - #plot_lines(fig3, [pareto_steps], [pareto_costs]) return f - - df = data_options[selected_dataset] - fig = px.scatter(df, x="x", y="y", title=f"Scatter Plot for {selected_dataset}") - return fig +from plotly import graph_objects as go +from dash import Output, Input @app.callback( Output("hyper-agg", "figure"), Input("hyper-agg-slider", "value") ) def update_hyper_agg_plot(thresh): - steps = EXPERIMENTS['agent_steps'] - costs = EXPERIMENTS['cost'] - scores = EXPERIMENTS['environment/score'] - - # Filter outliers - ''' - idxs = [i for i, s in enumerate(steps) if s < 1e6] - experiments = [experiments[i] for i in idxs] - steps = [steps[i] for i in idxs] - costs = [costs[i] for i in idxs] - scores = [scores[i] for i in idxs] - ''' + for i, env in enumerate(EXPERIMENTS): + steps = EXPERIMENTS[env]['agent_steps'] + costs = EXPERIMENTS[env]['cost'] + scores = EXPERIMENTS[env]['environment/score'] # Adjust steps - if 'env/frameskip' in EXPERIMENTS: - skip = EXPERIMENTS['env/frameskip'] + if 'env/frameskip' in EXPERIMENTS[env]: + skip = EXPERIMENTS[env]['env/frameskip'] steps = [n*m for n, m in zip(steps, skip)] + # Initialize figure + f = go.Figure() + f.update_layout( + title=dict(text='Bar', font=TITLE_FONT), + xaxis=dict(title=dict(text='Hyper', font=AXIS_FONT), tickfont=TICK_FONT), + yaxis=dict(title=dict(text='Value', font=AXIS_FONT), tickfont=TICK_FONT), + showlegend=True, + legend=dict(font=LEGEND_FONT), + plot_bgcolor=PLOT_BG_COLOR, + paper_bgcolor=PAPER_BG_COLOR, + width=1280, + height=720, + autosize=False, + yaxis_type='log', + barmode='overlay', # Overlay bars instead of stacking + xaxis_tickangle=45 # Rotate x-axis labels for readability + ) + f.update_xaxes(showgrid=False) + f.update_yaxes(showgrid=False) + # Filter by score max_score = max(scores) - idxs = [i for i, s in enumerate(scores) if s > thresh*max_score] - filtered_steps = [steps[i] for i in idxs] - filtered_costs = [costs[i] for i in idxs] - filtered_scores = [scores[i] for i in idxs] - - # Header plot - ''' - step_cost = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') - scatter(step_cost, filtered_steps, filtered_costs, filtered_scores, legend=env_name) - - step_score = figure(title='Sweep', xlabel='Steps', ylabel='Scores', legend='Trial') - scatter(step_score, steps, scores, costs, legend=env_name) - - cost_score = figure(title='Sweep', xlabel='Cost', ylabel='Scores', legend='Trial') - scatter(cost_score, costs, scores, steps, legend=env_name) - ''' - - figs = [] - - f = figure(title='bar', xlabel='Steps', ylabel='Hyper', legend='Ablate') - f.update_yaxes(type='log') - for hyper in HYPERS: - #idxs = [i for i, e in enumerate(experiments) if hyper in e] - y = [EXPERIMENTS[hyper][i] for i in idxs] - s = [scores[i] for i in idxs] - #ss = [np.log(steps[i]) for i in idxs] - x = [steps[i] for i in idxs] - #c = [costs[i] for i in idxs] - + thresholds = [0.95, 0.9, 0.75, 0.5, 0.25, 0.05][::-1] # Reversed for 0.95 on top + colors = ['indigo', 'blue', 'green', 'yellow', 'orange', 'red'][::-1] + widths = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3][::-1] # Narrower bars for higher thresholds + + for j, t in enumerate(thresholds): + idxs = [i for i, s in enumerate(scores) if s > t*max_score] + + if idxs: # Only add traces if there are valid indices + for k, hyper in enumerate(HYPERS): + try: + y = [EXPERIMENTS[env][hyper][i] for i in idxs] + except: + breakpoint() + if y: # Ensure y is not empty + ymin = min(y) + ymax = max(y) + f.add_trace( + go.Bar( + x=[hyper], # Hyperparameter as x-axis + y=[ymax - ymin], + base=ymin, + name=f"{hyper} (Thresh {t})" if j == 0 else "", # Legend only for first threshold + showlegend=(j == 0), + marker_color=colors[j], + opacity=0.8, + width=1.0 + ) + ) - ymin = min(y) - ymax = max(y) - f.add_trace( - go.Bar( - x=[0], - y=[ymax - ymin], - base=ymin, - name=hyper - ) - ) - - #scatter(f, x, y, s, legend=env_name) - - x = np.array(x) - s = np.array(s) - y = np.array(y) - x_smooth, y_smooth, y_lower, y_upper = loess_fit(x, s, y, n_bins=20, frac=0.4) - s = np.ones_like(x_smooth) - - #plot_lines(f, [x_smooth], [y_smooth]) - #scatter(f, x_smooth, y_smooth, 'red', legend=env_name) - - #scatter(f, v, s, ss, legend=env_name) - #scatter(f, x, y, s, legend=env_name) - #figs.append(f) - - #pareto_steps, pareto_costs, pareto_scores = pareto_points(steps, costs, scores) - #plot_lines(fig3, [pareto_steps], [pareto_costs]) return f - - df = data_options[selected_dataset] - fig = px.scatter(df, x="x", y="y", title=f"Scatter Plot for {selected_dataset}") - return fig - - - # Set layout with static graph #app.layout = layout if __name__ == '__main__': From 82f2e930cc0b8340e415e786c4938e24f27df569 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 30 Sep 2025 17:42:55 +0000 Subject: [PATCH 020/188] decent prototype --- assets/dash.css | 22 +++ plottest.py | 353 +++++++++++++++++++++++++---------- pufferlib/config/default.ini | 8 +- 3 files changed, 277 insertions(+), 106 deletions(-) create mode 100644 assets/dash.css diff --git a/assets/dash.css b/assets/dash.css new file mode 100644 index 000000000..fa03745b9 --- /dev/null +++ b/assets/dash.css @@ -0,0 +1,22 @@ +:root { + --font-color: #F1F1F1; + --dropdown-bg: #005050; +} + +body { + background-color: black !important; + color: var(--font-color) !important; +} + +.rc-slider-mark-text { + color: var(--font-color) !important; +} + +.Select-control, .Select-menu-outer, .Select-value-label, .Select-option { + color: var(--font-color) !important; + background-color: var(--dropdown-bg) !important; +} + +h1, h2, h3, h4, h5, h6 { + color: var(--font-color) !important; +} diff --git a/plottest.py b/plottest.py index bf4a689db..6284f88ba 100644 --- a/plottest.py +++ b/plottest.py @@ -220,11 +220,17 @@ def plot_quantiles(fig, experiments, thresh, xlabel='Performance', legend='Trial if len(filtered['environment/score']) < 5: continue # Skip - x = filtered['agent_steps'] + steps = filtered['agent_steps'] + # Adjust steps + if 'env/frameskip' in experiments: + skip = experiments['env/frameskip'] + steps = [n*m for n, m in zip(steps, skip)] + + x = steps y = filtered['cost'] s = np.ones_like(x) - fx, fy, fs = pareto_points(x, y, s, 0.1) + fx, fy, fs = pareto_points(x, y, s, 0.05) fx = np.array(fx) fy = np.array(fy) @@ -368,7 +374,15 @@ def load_sweep_data(path): for kk in list(data.keys()): if kk in exp: - data[kk].append(exp[kk]) + v = exp[kk] + sweep_key = f'sweep/{kk}/distribution' + if sweep_key in data and exp[sweep_key] == 'logit_normal': + v = 1 - v + elif kk in ('train/vtrace_rho_clip', 'train/vtrace_c_clip'): + # Temporary hack for bad bounds + v = max(v, 0.1) + + data[kk].append(v) else: data[kk].append(exp['data'][-1][kk]) @@ -548,71 +562,123 @@ def loess_fit(x, s, y, n_bins=20, frac=0.4): # Initialize Dash app app = Dash() +app.css.append_css({'external_stylesheets': 'dash.css'}) app.layout = html.Div([ html.H1('The Puffer Frontier Project', style={'textAlign': 'center'}), - #dcc.Graph(figure=step_cost), - #dcc.Graph(figure=step_score), - #dcc.Graph(figure=cost_score), html.Br(), - dcc.Slider( - id='pareto-slider', - min=0.0, - max=1.0, - step=0.05, - value=0.95, - marks={i: str(0.05*i) for i in range(0, 21)} - ), + html.Label([ + "Score Threshold %: ", + dcc.Slider( + id='pareto-slider', + min=0.0, + max=1.0, + step=0.05, + value=0.95, + marks={i: str(0.05*i) for i in range(0, 21)}, + ) + ]), dcc.Graph(id='pareto'), - dcc.Slider( - id='hyper-stable-slider', - min=0.0, - max=1.0, - step=0.05, - value=0.95, - marks={i: str(0.05*i) for i in range(0, 21)} - ), - dcc.Graph(id='hyper-stable'), - dcc.Slider( - id='hyper-agg-slider', - min=0.0, - max=1.0, - step=0.05, - value=0.95, - marks={i: str(0.05*i) for i in range(0, 21)} - ), + html.Br(), + html.Label([ + "Score Threshold %: ", + dcc.Slider( + id='hyper-box-thresh', + min=0.0, + max=1.0, + step=0.05, + value=0.95, + marks={i: str(0.05*i) for i in range(0, 21)} + ) + ]), + html.Label([ + "Bins: ", + dcc.Slider( + id='hyper-box-buckets', + min=1, + max=10, + step=1, + value=5, + marks={i: str(i) for i in range(0, 11)} + ) + ]), + html.Label([ + "X Axis: ", + dcc.Dropdown( + id="hyper-box-x", + options=[{"label": key, "value": key} for key in ['cost', 'agent_steps']], + value="agent_steps", + style={"width": "50%"} + ) + ]), + dcc.Graph(id='hyper-box'), + html.Br(), + html.Label([ + "Score Threshold %: ", + dcc.Slider( + id='hyper-agg-slider', + min=0.0, + max=1.0, + step=0.05, + value=0.95, + marks={i: str(0.05*i) for i in range(0, 21)} + ) + ]), + html.Label([ + "Steps Interval: ", + dcc.RangeSlider( + id='hyper-agg-range', + min=0.0, + max=1.0, + step=0.1, + value=[0.0, 1.0] + ) + ]), dcc.Graph(id='hyper-agg'), - dcc.Dropdown( - id="scatter-dropdown-x", - options=[{"label": key, "value": key} for key in ALL_KEYS], - value="agent_steps", - style={"width": "50%"} - ), - dcc.Dropdown( - id="scatter-dropdown-y", - options=[{"label": key, "value": key} for key in ALL_KEYS], - value="environment/score", - style={"width": "50%"} - ), + html.Br(), + html.Label([ + "Environment: ", + dcc.Dropdown( + id="scatter-dropdown-env", + options=[{"label": key, "value": key} for key in env_names], + value="breakout", + style={"width": "50%"} + ) + ]), + html.Label([ + "X: ", + dcc.Dropdown( + id="scatter-dropdown-x", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="agent_steps", + style={"width": "50%"} + ) + ]), + html.Label([ + "Y: ", + dcc.Dropdown( + id="scatter-dropdown-y", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="environment/score", + style={"width": "50%"} + ) + ]), dcc.Graph(id='scatter') -]) +], +style={"width": 1280} +) @app.callback( Output("pareto", "figure"), Input("pareto-slider", "value") ) def update_pareto_plot(thresh): - f = figure(title='Sweep', xlabel='Steps', ylabel='Cost', legend='Trial') + f = figure(title='Compute/Data Pareto Front', xlabel='Steps', ylabel='Cost', legend='Trial') for i, env in enumerate(EXPERIMENTS): steps = EXPERIMENTS[env]['agent_steps'] costs = EXPERIMENTS[env]['cost'] scores = EXPERIMENTS[env]['environment/score'] - # Adjust steps - if 'env/frameskip' in EXPERIMENTS[env]: - skip = EXPERIMENTS[env]['env/frameskip'] - steps = [n*m for n, m in zip(steps, skip)] - # Header plot #if key == 'front': plot_quantiles(f, EXPERIMENTS[env], thresh, xlabel='Steps', legend=env, log_x=False, i=i) @@ -629,22 +695,23 @@ def update_pareto_plot(thresh): @app.callback( Output("scatter", "figure"), + Input("scatter-dropdown-env", "value"), Input("scatter-dropdown-x", "value"), Input("scatter-dropdown-y", "value") ) -def update_scatter(xkey, ykey): - for i, env in enumerate(EXPERIMENTS): - steps = EXPERIMENTS[env]['agent_steps'] - costs = EXPERIMENTS[env]['cost'] - scores = EXPERIMENTS[env]['environment/score'] +def update_scatter(env, xkey, ykey): + steps = EXPERIMENTS[env]['agent_steps'] + costs = EXPERIMENTS[env]['cost'] + scores = EXPERIMENTS[env]['environment/score'] + # TODO: This is not applying frameskip # Adjust steps if 'env/frameskip' in EXPERIMENTS[env]: skip = EXPERIMENTS[env]['env/frameskip'] steps = [n*m for n, m in zip(steps, skip)] - f = figure(title='Scatter', xlabel='Steps', ylabel='Hyper', legend='Ablate') - f.update_yaxes(type='log') + f = figure(title='Experiments', xlabel=xkey, ylabel=ykey, legend='Ablate') + #f.update_yaxes(type='log') x = EXPERIMENTS[env][xkey] y = EXPERIMENTS[env][ykey] @@ -749,6 +816,93 @@ def update_hyper_stable(thresh): return f +@app.callback( + Output("hyper-box", "figure"), + Input("hyper-box-thresh", "value"), + Input("hyper-box-buckets", "value"), + Input("hyper-box-x", "value") +) +def update_hyper_box(thresh, buckets, x): + # Initialize data storage + env_data = {} + + # Process each environment + for env in EXPERIMENTS: + steps = EXPERIMENTS[env]['agent_steps'] + costs = EXPERIMENTS[env]['cost'] + scores = EXPERIMENTS[env]['environment/score'] + + # Adjust steps if frameskip exists + if 'env/frameskip' in EXPERIMENTS[env]: + skip = EXPERIMENTS[env]['env/frameskip'] + steps = [n*m for n, m in zip(steps, skip)] + + # Filter by score threshold + max_score = max(scores) + idxs = [i for i, s in enumerate(scores) if s > thresh*max_score] + + # Select x-axis data based on input + x_data = costs if x == 'cost' else steps + filtered_x = [x_data[i] for i in idxs] + + # Get all hyperparameters + + # Store filtered data for this environment + hyper_data = {} + env_data[env] = {'x': filtered_x, 'hypers': hyper_data} + for h in HYPERS: + hyper_data[h] = [EXPERIMENTS[env][h][i] for i in idxs] + + # Create buckets + all_x = [x for env in env_data for x in env_data[env]['x']] + x_min, x_max = min(all_x), max(all_x) + bucket_edges = np.linspace(x_min, x_max, buckets + 1) + bucket_centers = (bucket_edges[:-1] + bucket_edges[1:]) / 2 + + # Initialize heatmap data + heatmap_data = np.zeros((len(HYPERS), buckets)) + + # Compute means for each bucket and hyperparameter + for i, hyper in enumerate(HYPERS): + for j in range(buckets): + bucket_means = [] + for env in env_data: + if hyper not in env_data[env]['hypers']: + continue + + x_vals = np.array(env_data[env]['x']) + hyper_vals = np.array(env_data[env]['hypers'][hyper]) + # Find indices in current bucket + idxs = (x_vals >= bucket_edges[j]) & (x_vals < bucket_edges[j+1]) + if np.any(idxs): + bucket_means.append(np.mean(hyper_vals[idxs])) + + # Average across environments + heatmap_data[i, j] = np.mean(bucket_means) if bucket_means else np.nan + + heatmap_data = np.log(heatmap_data) + + # Create heatmap + f = figure(title="Hyperparameter Drift", + xlabel=x.capitalize(), + ylabel="Hyperparameters" + ) + + f.add_trace( + go.Heatmap( + x=bucket_centers, + y=HYPERS, + z=heatmap_data, + colorscale='Viridis', + showscale=True, + zmin=np.nanmin(heatmap_data), + zmax=np.nanmax(heatmap_data), + colorbar=dict(title="Value") + ) + ) + + return f + @app.callback( Output("hyper", "figure"), Input("hyper-dropdown", "value") @@ -793,25 +947,16 @@ def update_hyper_plot(hyper): @app.callback( Output("hyper-agg", "figure"), - Input("hyper-agg-slider", "value") + Input("hyper-agg-slider", "value"), + Input("hyper-agg-range", "value") ) -def update_hyper_agg_plot(thresh): - for i, env in enumerate(EXPERIMENTS): - steps = EXPERIMENTS[env]['agent_steps'] - costs = EXPERIMENTS[env]['cost'] - scores = EXPERIMENTS[env]['environment/score'] - - # Adjust steps - if 'env/frameskip' in EXPERIMENTS[env]: - skip = EXPERIMENTS[env]['env/frameskip'] - steps = [n*m for n, m in zip(steps, skip)] - +def update_hyper_agg_plot(thresh, step_range): # Initialize figure f = go.Figure() f.update_layout( - title=dict(text='Bar', font=TITLE_FONT), - xaxis=dict(title=dict(text='Hyper', font=AXIS_FONT), tickfont=TICK_FONT), - yaxis=dict(title=dict(text='Value', font=AXIS_FONT), tickfont=TICK_FONT), + title=dict(text='Hyperparameter Stable Range', font=TITLE_FONT), + xaxis=dict(title=dict(text='Value', font=AXIS_FONT), tickfont=TICK_FONT), + yaxis=dict(title=dict(text='Hyper', font=AXIS_FONT), tickfont=TICK_FONT), showlegend=True, legend=dict(font=LEGEND_FONT), plot_bgcolor=PLOT_BG_COLOR, @@ -819,43 +964,47 @@ def update_hyper_agg_plot(thresh): width=1280, height=720, autosize=False, - yaxis_type='log', + xaxis_type='log', barmode='overlay', # Overlay bars instead of stacking - xaxis_tickangle=45 # Rotate x-axis labels for readability ) f.update_xaxes(showgrid=False) f.update_yaxes(showgrid=False) - # Filter by score - max_score = max(scores) - thresholds = [0.95, 0.9, 0.75, 0.5, 0.25, 0.05][::-1] # Reversed for 0.95 on top - colors = ['indigo', 'blue', 'green', 'yellow', 'orange', 'red'][::-1] - widths = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3][::-1] # Narrower bars for higher thresholds + for i, env in enumerate(EXPERIMENTS): + steps = EXPERIMENTS[env]['agent_steps'] + costs = EXPERIMENTS[env]['cost'] + scores = EXPERIMENTS[env]['environment/score'] - for j, t in enumerate(thresholds): - idxs = [i for i, s in enumerate(scores) if s > t*max_score] + # Adjust steps + if 'env/frameskip' in EXPERIMENTS[env]: + skip = EXPERIMENTS[env]['env/frameskip'] + steps = [n*m for n, m in zip(steps, skip)] + + max_score = max(scores) + max_steps = max(steps) + n = len(scores) + idxs = [i for i in range(n) if scores[i] > thresh*max_score and + step_range[0] Date: Tue, 30 Sep 2025 18:09:51 +0000 Subject: [PATCH 021/188] Update configs --- pufferlib/config/ocean/breakout.ini | 17 ++++++++++------- pufferlib/config/ocean/tetris.ini | 24 +++++++++++++++++++++--- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index fbe9c6722..a4b3698a6 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -5,12 +5,14 @@ policy_name = Policy rnn_name = Recurrent [vec] -#num_envs = 8 -num_envs = 1 +#num_envs = 1 +num_envs = 4 + +# Experiment - fewer cores per env to avoid clogging [env] -#num_envs = 64 -num_envs = 512 +num_envs = 64 +#num_envs = 512 frameskip = 1 width = 576 height = 330 @@ -58,9 +60,10 @@ max_cost = 240 [sweep.train.total_timesteps] distribution = log_normal -min = 1e6 -max = 1e8 -mean = 5e7 +min = 2e7 +max = 9e7 +# Experiment: up from 6e7 max +mean = 4e7 scale = auto [sweep.policy.hidden_size] diff --git a/pufferlib/config/ocean/tetris.ini b/pufferlib/config/ocean/tetris.ini index 0391ca317..a07093e60 100644 --- a/pufferlib/config/ocean/tetris.ini +++ b/pufferlib/config/ocean/tetris.ini @@ -5,12 +5,15 @@ policy_name = Policy rnn_name = Recurrent [vec] -num_envs = 8 +num_envs = 4 [env] num_envs = 1024 deck_size = 3 +[policy] +hidden_size = 128 + [train] total_timesteps = 2_000_000_000 batch_size = auto @@ -20,10 +23,25 @@ minibatch_size = 32768 [sweep] metric = score goal = maximize +max_cost = 300 [sweep.train.total_timesteps] distribution = log_normal -min = 2e8 -max = 4e8 +min = 1e7 +max = 1e9 mean = 3e8 scale = auto + +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 16 +max = 1024 +mean = 128 +scale = auto + +[sweep.env.num_envs] +distribution = uniform_pow2 +min = 1 +max = 4096 +mean = 2048 +scale = auto From 352e0cc2c89fbe9a9501a94f29f7dbb9f5e98a40 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 30 Sep 2025 18:24:33 +0000 Subject: [PATCH 022/188] configs --- pufferlib/config/ocean/grid.ini | 23 +++++++++++++++++------ pufferlib/config/ocean/pong.ini | 12 ++++++------ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/pufferlib/config/ocean/grid.ini b/pufferlib/config/ocean/grid.ini index 65bd540b6..e28885c2b 100644 --- a/pufferlib/config/ocean/grid.ini +++ b/pufferlib/config/ocean/grid.ini @@ -7,10 +7,6 @@ rnn_name = Recurrent [policy] hidden_size = 512 -[rnn] -input_size = 512 -hidden_size = 512 - [vec] #num_envs = 8 num_envs = 1 @@ -63,10 +59,25 @@ vtrace_rho_clip = 4.7398234531013985 [sweep] downsample = 0 +max_cost = 300 [sweep.train.total_timesteps] distribution = log_normal -min = 3e8 -max = 6e8 +min = 1e7 +max = 1e9 mean = 3e8 scale = time + +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 16 +max = 1024 +mean = 128 +scale = auto + +[sweep.env.num_envs] +distribution = uniform_pow2 +min = 1 +max = 4096 +mean = 2048 +scale = auto diff --git a/pufferlib/config/ocean/pong.ini b/pufferlib/config/ocean/pong.ini index 1d3cf28e6..cdb4f1df9 100644 --- a/pufferlib/config/ocean/pong.ini +++ b/pufferlib/config/ocean/pong.ini @@ -68,12 +68,12 @@ max = 1024 mean = 128 scale = auto -#[sweep.train.total_timesteps] -#distribution = log_normal -#min = 5e5 -#max = 12e6 -#mean = 1e7 -#scale = auto +[sweep.train.total_timesteps] +distribution = log_normal +min = 5e5 +max = 5e6 +mean = 1e6 +scale = auto [sweep.env.num_envs] distribution = uniform_pow2 From 6bb09d11df82ef5d4d27ff8248f3b3cb800069e8 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 30 Sep 2025 19:08:52 +0000 Subject: [PATCH 023/188] Simplify --- plottest.py | 482 +++------------------------------------------------- 1 file changed, 25 insertions(+), 457 deletions(-) diff --git a/plottest.py b/plottest.py index 6284f88ba..56ad5dc0b 100644 --- a/plottest.py +++ b/plottest.py @@ -17,32 +17,26 @@ PAPER_BG_COLOR = '#061a1a' LINE_WIDTH = 4 LINE_COLORS = ["#0000b3", "#0010d9", "#0020ff", "#0040ff", "#0060ff", "#0080ff", "#009fff", "#00bfff", "#00ffff"][::-1] -#['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] - TITLE_FONT = dict( family=FONT_FAMILY, size=FONT_SIZE_TITLE, color=FONT_COLOR ) - AXIS_FONT = dict( family=FONT_FAMILY, size=FONT_SIZE_AXIS, color=FONT_COLOR ) - TICK_FONT = dict( family=FONT_FAMILY, size=FONT_SIZE_TICK, color=FONT_COLOR ) - LEGEND_FONT = dict( family=FONT_FAMILY, size=FONT_SIZE_LEGEND, color=FONT_COLOR ) - HYPERS = [ 'train/learning_rate', 'train/ent_coef', @@ -66,46 +60,12 @@ 'env/frameskip', 'env/num_envs', ] - ALL_KEYS = [ 'agent_steps', 'cost', 'environment/score' ] + HYPERS -def rgba(hex, alpha): - return f"rgba({int(hex[1:3], 16)}, {int(hex[3:5], 16)}, {int(hex[5:7], 16)}, {alpha})" - -def band(experiments, key, qmin=0.0, qmax=1.0): - mmax = np.array(experiments[key]).max() - top = qmax * mmax - bot = qmin * mmax - filtered = {k: [] for k in experiments} - for i, score in enumerate(experiments[key]): - if score < bot or score > top: - continue - - for k, v in experiments.items(): - filtered[k].append(v[i]) - - return filtered - -def mean_conf(xx, yy): - x_min = min([min(x) for x in xx]) - x_max = max([max(x) for x in xx]) - y_min = min([min(y) for y in yy]) - y_max = max([max(y) for y in yy]) - - x = np.linspace(x_min, x_max, 100) - y_interps = np.stack([ - np.interp(x, x_, y_) for x_, y_ in zip(xx, yy)]) - - mean = np.mean(y_interps, axis=0) - std = np.std(y_interps, axis=0) - conf = 1.96 * std / np.sqrt(len(xx)) - - return x, mean, conf - def figure(title='The Puffer Frontier Project', xlabel='Uptime', ylabel='Score', legend='Trial', xaxis_type='linear'): @@ -127,26 +87,9 @@ def figure(title='The Puffer Frontier Project', fig.update_yaxes(showgrid=False) return fig -def plot_lines(fig, xx, yy, name='Trial'): - for i, (x, y) in enumerate(zip(xx, yy)): - fig.add_trace( - go.Scatter( - x=x, - y=y, - mode='lines', - name=name, - line=dict( - color=LINE_COLORS[i % len(LINE_COLORS)], - width=LINE_WIDTH - ) - ) - ) - def scatter(fig, x, y, c, legend='Trial', log_x=False, i=0, showlegend=True): mmin = min(c) mmax = max(c) - #vals = [(c - mmin)/(mmax - mmin) for c in c] - #vals = [max(0.01, v) for v in vals] if isinstance(c, str): colors = c @@ -162,7 +105,6 @@ def scatter(fig, x, y, c, legend='Trial', log_x=False, i=0, showlegend=True): v = 0.001 colors.append(f'rgb(0, 0.5, {v})') - #c = (np.array(c) - min(c))/(max(c) - min(c)) fig.add_trace( go.Scatter( x=x, @@ -177,37 +119,21 @@ def scatter(fig, x, y, c, legend='Trial', log_x=False, i=0, showlegend=True): ) ) -def plot_group(fig, xx, yy, xlabel='Performance', legend='Trial', log_x=False, i=0): - x, mean, conf = mean_conf(xx, yy) - fig.add_trace( - go.Scatter( - x=np.concatenate([x, x[::-1]]), - y=np.concatenate([mean + conf, (mean - conf)[::-1]]), - fill='toself', - fillcolor=rgba(LINE_COLORS[i], 0.2), - line=dict( - color='rgba(255,255,255,0)', - width=LINE_WIDTH - ), - showlegend=False - ) - ) - fig.add_trace( - go.Scatter( - x=x, - y=mean, - mode='lines', - name=legend, - line=dict( - color=LINE_COLORS[i], - width=LINE_WIDTH - ) - ) - ) +def band(experiments, key, qmin=0.0, qmax=1.0): + mmax = np.array(experiments[key]).max() + top = qmax * mmax + bot = qmin * mmax + filtered = {k: [] for k in experiments} + for i, score in enumerate(experiments[key]): + if score < bot or score > top: + continue + + for k, v in experiments.items(): + filtered[k].append(v[i]) + + return filtered def plot_quantiles(fig, experiments, thresh, xlabel='Performance', legend='Trial', log_x=False, i=0): - # Define quantile thresholds (in descending order for proper range filtering) - #quantile_thresholds = [1.0, 0.95, 0.9, 0.75, 0.5, 0.25, 0.05] quantile_thresholds = [1.0, thresh] colors = [['indigo', 'blue', 'green', 'yellow', 'orange', 'red'][i]] @@ -228,33 +154,16 @@ def plot_quantiles(fig, experiments, thresh, xlabel='Performance', legend='Trial x = steps y = filtered['cost'] - s = np.ones_like(x) - - fx, fy, fs = pareto_points(x, y, s, 0.05) - - fx = np.array(fx) - fy = np.array(fy) - fs = np.array(fs) - - px, py, y_lower, y_upper = loess_fit(fx, fs, fy, n_bins=10, frac=0.4) - - #x_q = x[mask] - #y_q = y[mask] + s = filtered['environment/score'] - # Compute moving average for center line - #y_mean = np.convolve(y_q, np.ones(window_size)/window_size, mode='valid') - # Adjust x_q to match the length of y_mean (trim edges due to convolution) - #trim = (window_size - 1) // 2 - #x_mean = x_q[trim:len(x_q)-trim] - - #if len(x_mean) <= 1: - # continue # Skip if not enough points after trimming + s_one = np.ones_like(x) + fx, fy, fs = pareto_points(x, y, s_one, 0.00) # Plot center line fig.add_trace( go.Scatter( - x=px, - y=py, + x=fx, + y=fy, mode='lines', name=f'{legend} Q{qmin:.2f}', line=dict( @@ -267,8 +176,8 @@ def plot_quantiles(fig, experiments, thresh, xlabel='Performance', legend='Trial # Plot scatter for points in this quantile range fig.add_trace( go.Scatter( - x=fx, - y=fy, + x=x, + y=y, mode='markers', name=f'{legend} Q{qmin:.2f} Points', marker=dict( @@ -309,45 +218,6 @@ def pareto_points(steps, costs, scores, soft=0.0): pareto_scores = [pareto_scores[i] for i in idxs] return pareto_steps, pareto_costs, pareto_scores -def load_seed_data(filename): - with open(filename, 'r') as f: - experiments = json.load(f) - - all_uptime = [] - all_perf = [] - for trial in experiments: - uptime = [] - perf = [] - for e in trial: - u = e['uptime'] - if 'environment/perf' not in e: - continue - - uptime.append(u) - perf.append(e['environment/perf']) - - all_uptime.append(uptime) - all_perf.append(perf) - - return all_uptime, all_perf - -def load_hyper_data(filename): - with open(filename, 'r') as f: - experiments = json.load(f) - - all_hyper = [] - all_perf = [] - for trial in experiments: - hyper = trial[-1]['learning_rate'] - perf = trial[-1]['environment/perf'] - all_hyper.append(hyper) - all_perf.append(perf) - - all_hyper = np.array(all_hyper).reshape(3, -1) - all_perf = np.array(all_perf).reshape(3, -1) - return all_hyper, all_perf - - def load_sweep_data(path): data = {} keys = None @@ -400,160 +270,6 @@ def cached_sweep_load(path): return data - ''' - # Step 1: Check if cache exists; if not, create it using load_sweep_data - if not os.path.exists(cache_file): - experiments = load_sweep_data(os.path.join(path, '*.json')) - # Create cache as list of [filename, data] pairs - cache_data = [ - [os.path.basename(fpath), exp] - for fpath, exp in zip(glob.glob(os.path.join(path, '*.json')), experiments) - if not fpath.endswith('cache.json') # Exclude cache file itself - ] - # Write cache - with open(cache_file, 'w') as f: - json.dump(cache_data, f) - - # Step 2: Load existing cache - with open(cache_file, 'r') as f: - cache_data = json.load(f) - - # Convert cache to dict: {filename: data} - cache_dict = {item[0]: item[1] for item in cache_data} - - # Get current files in directory (excluding cache.json) - current_files = set( - os.path.basename(f) for f in glob.glob(os.path.join(path, '*.json')) - if not f.endswith('cache.json') - ) - cached_files = set(cache_dict.keys()) - - # Step 3: Check for new files not in cache - new_files = current_files - cached_files - if new_files: - # Load only new files using a modified load_sweep_data - new_experiments = [] - new_file_paths = [os.path.join(path, fname) for fname in new_files] - for fpath in new_file_paths: - with open(fpath, 'r') as f: - exp = json.load(f) - data = {} - for kk, vv in exp.items(): - if kk == 'data': - for k, v in exp[kk][-1].items(): - data[k] = v - else: - data[kk] = vv - new_experiments.append([os.path.basename(fpath), data]) - - # Update cache with new experiments - cache_data.extend(new_experiments) - # Write updated cache - with open(cache_file, 'w') as f: - json.dump(cache_data, f) - - return [e[1] for e in cache_data] - return cache_data - - # Rebuild cache_dict to include any new files - cache_dict = {item[0]: item[1] for item in cache_data} - - # Return cached data as a dictionary - return cache_dict - ''' - - -from statsmodels.nonparametric.smoothers_lowess import lowess -def compute_bin_stats(x, s, y, n_bins=20, overlap=0.5, score_threshold=0.95): - """ - Bin data, select high-performing points, and compute weighted stats. - x: steps (log-scaled compute/samples), s: scores, y: hyperparameter values - """ - # Log-scale x to handle compute/samples range - x_log = np.log10(x) - x_min, x_max = x_log.min(), x_log.max() - bin_width = (x_max - x_min) / (n_bins * (1 - overlap)) - bin_centers = np.linspace(x_min, x_max, n_bins) - - y_weighted = [] - y_lower = [] - y_upper = [] - - for center in bin_centers: - # Define bin boundaries with overlap - bin_start = center - bin_width / 2 - bin_end = center + bin_width / 2 - mask = (x_log >= bin_start) & (x_log <= bin_end) - - # Select high-performing points (scores within 95% of max in bin) - bin_s = s[mask] - if len(bin_s) == 0: - y_weighted.append(np.nan) - y_lower.append(np.nan) - y_upper.append(np.nan) - continue - s_max = bin_s.max() - high_perf_mask = bin_s >= score_threshold * s_max - - # Compute weighted mean and quantiles for y - bin_y = y[mask][high_perf_mask] - bin_s = bin_s[high_perf_mask] - if len(bin_y) == 0: - y_weighted.append(np.nan) - y_lower.append(np.nan) - y_upper.append(np.nan) - continue - weights = bin_s / bin_s.sum() # Normalize scores as weights - y_mean = np.average(bin_y, weights=weights) - y_quantiles = np.percentile(bin_y, [25, 75]) # IQR for stability range - - y_weighted.append(y_mean) - y_lower.append(y_quantiles[0]) - y_upper.append(y_quantiles[1]) - - return bin_centers, np.array(y_weighted), np.array(y_lower), np.array(y_upper) - -def loess_fit(x, s, y, n_bins=20, frac=0.4): - """ - Perform LOESS fit on binned data for smoothed curve and ribbons. - """ - # Compute bin statistics - bin_centers, y_weighted, y_lower, y_upper = compute_bin_stats(x, s, y, n_bins=n_bins) - - # Remove NaNs for LOESS - valid_mask = ~np.isnan(y_weighted) - bin_centers = bin_centers[valid_mask] - y_weighted = y_weighted[valid_mask] - y_lower = y_lower[valid_mask] - y_upper = y_upper[valid_mask] - - # Apply LOESS to weighted mean, lower, and upper bounds - smoothed_y = lowess(y_weighted, bin_centers, frac=frac, return_sorted=True) - smoothed_lower = lowess(y_lower, bin_centers, frac=frac, return_sorted=True) - smoothed_upper = lowess(y_upper, bin_centers, frac=frac, return_sorted=True) - - # Convert back to original x scale - x_smooth = 10 ** smoothed_y[:, 0] # Undo log-scale - y_smooth = smoothed_y[:, 1] - y_smooth_lower = smoothed_lower[:, 1] - y_smooth_upper = smoothed_upper[:, 1] - - return x_smooth, y_smooth, y_smooth_lower, y_smooth_upper - -#fig1 = figure(title='Hyperparameter Ablation', xlabel='Learning Rate', legend='Ablate', xaxis_type='log') -#all_hyper, all_perf = load_hyper_data('puffer_pong_learning_rate.npz') -#plot_group(fig1, all_hyper, all_perf, legend='Pong') -#all_hyper, all_perf = load_hyper_data('puffer_breakout_learning_rate.npz') -#plot_group(fig1, all_hyper, all_perf, legend='Breakout', i=1) - -#fig2 = figure(title='Seed Sensitivity', xlabel='Uptime', legend='Ablate') -#all_uptime, all_perf = load_seed_data('puffer_pong_seeds.npz') -#plot_group(fig2, all_uptime, all_perf, legend='Pong') -#all_uptime, all_perf = load_seed_data('puffer_breakout_seeds.npz') -#plot_group(fig2, all_uptime, all_perf, legend='Breakout', i=1) -#all_uptime, all_perf = load_seed_data('puffer_connect4_seeds.npz') -#plot_group(fig2, all_uptime, all_perf, legend='Connect4', i=2) - env_names = ['breakout', 'pong'] EXPERIMENTS = { name: cached_sweep_load(f'experiments/logs/puffer_{name}') @@ -672,7 +388,8 @@ def loess_fit(x, s, y, n_bins=20, frac=0.4): Input("pareto-slider", "value") ) def update_pareto_plot(thresh): - f = figure(title='Compute/Data Pareto Front', xlabel='Steps', ylabel='Cost', legend='Trial') + f = figure(title='Compute/Data Pareto Front', + xlabel='Steps', ylabel='Cost', legend='Trial') for i, env in enumerate(EXPERIMENTS): steps = EXPERIMENTS[env]['agent_steps'] @@ -680,16 +397,8 @@ def update_pareto_plot(thresh): scores = EXPERIMENTS[env]['environment/score'] # Header plot - #if key == 'front': - plot_quantiles(f, EXPERIMENTS[env], thresh, xlabel='Steps', legend=env, log_x=False, i=i) - ''' - elif key == 'score': - f = figure(title='Sweep', xlabel='Steps', ylabel='Scores', legend='Trial') - scatter(f, steps, scores, costs, legend=env_name) - elif key == 'cost': - f = figure(title='Sweep', xlabel='Cost', ylabel='Scores', legend='Trial') - scatter(f, costs, scores, steps, legend=env_name) - ''' + plot_quantiles(f, EXPERIMENTS[env], thresh, + xlabel='Steps', legend=env, log_x=False, i=i) return f @@ -711,109 +420,12 @@ def update_scatter(env, xkey, ykey): steps = [n*m for n, m in zip(steps, skip)] f = figure(title='Experiments', xlabel=xkey, ylabel=ykey, legend='Ablate') - #f.update_yaxes(type='log') x = EXPERIMENTS[env][xkey] y = EXPERIMENTS[env][ykey] c = scores scatter(f, x, y, c, showlegend=False) - ''' - # Filter by score - max_score = max(scores) - idxs = [i for i, s in enumerate(scores) if s > 0.95*max_score] - filtered_steps = [steps[i] for i in idxs] - filtered_costs = [costs[i] for i in idxs] - filtered_scores = [scores[i] for i in idxs] - - f = figure(title='Hyper Stability', xlabel='Steps', ylabel='Hyper', legend='Ablate') - f.update_yaxes(type='log') - - for j, hyper in enumerate(HYPERS): - c = LINE_COLORS[j % len(LINE_COLORS)] - s = [scores[i] for i in idxs] - x = [steps[i] for i in idxs] - y = [EXPERIMENTS[env][hyper][i] for i in idxs] - - x, y, s = pareto_points(x, y, s, 0.1) - scatter(f, x, y, c, showlegend=False) - - x = np.array(x) - s = np.array(s) - y = np.array(y) - x_smooth, y_smooth, y_lower, y_upper = loess_fit(x, s, y, n_bins=20, frac=0.4) - s = np.ones_like(x_smooth) - - f.add_trace( - go.Scatter( - x=x_smooth, - y=y_smooth, - mode='lines', - name=hyper, - line=dict( - color=c, - width=LINE_WIDTH - ) - ) - ) - ''' - - return f - - -@app.callback( - Output("hyper-stable", "figure"), - Input("hyper-stable-slider", "value") -) -def update_hyper_stable(thresh): - for i, env in enumerate(EXPERIMENTS): - steps = EXPERIMENTS[env]['agent_steps'] - costs = EXPERIMENTS[env]['cost'] - scores = EXPERIMENTS[env]['environment/score'] - - # Adjust steps - if 'env/frameskip' in EXPERIMENTS[env]: - skip = EXPERIMENTS[env]['env/frameskip'] - steps = [n*m for n, m in zip(steps, skip)] - - # Filter by score - max_score = max(scores) - idxs = [i for i, s in enumerate(scores) if s > 0.95*max_score] - filtered_steps = [steps[i] for i in idxs] - filtered_costs = [costs[i] for i in idxs] - filtered_scores = [scores[i] for i in idxs] - - f = figure(title='Hyper Stability', xlabel='Steps', ylabel='Hyper', legend='Ablate') - f.update_yaxes(type='log') - - for j, hyper in enumerate(HYPERS): - c = LINE_COLORS[j % len(LINE_COLORS)] - s = [scores[i] for i in idxs] - x = [steps[i] for i in idxs] - y = [EXPERIMENTS[env][hyper][i] for i in idxs] - - x, y, s = pareto_points(x, y, s, 0.1) - scatter(f, x, y, c, showlegend=False) - - x = np.array(x) - s = np.array(s) - y = np.array(y) - x_smooth, y_smooth, y_lower, y_upper = loess_fit(x, s, y, n_bins=20, frac=0.4) - s = np.ones_like(x_smooth) - - f.add_trace( - go.Scatter( - x=x_smooth, - y=y_smooth, - mode='lines', - name=hyper, - line=dict( - color=c, - width=LINE_WIDTH - ) - ) - ) - return f @app.callback( @@ -903,48 +515,6 @@ def update_hyper_box(thresh, buckets, x): return f -@app.callback( - Output("hyper", "figure"), - Input("hyper-dropdown", "value") -) -def update_hyper_plot(hyper): - for i, env in enumerate(EXPERIMENTS): - steps = EXPERIMENTS[env]['agent_steps'] - costs = EXPERIMENTS[env]['cost'] - scores = EXPERIMENTS[env]['environment/score'] - - # Adjust steps - if 'env/frameskip' in EXPERIMENTS[env]: - skip = EXPERIMENTS[env]['env/frameskip'] - steps = [n*m for n, m in zip(steps, skip)] - - # Filter by score - max_score = max(scores) - idxs = [i for i, s in enumerate(scores) if s > 0.95*max_score] - filtered_steps = [steps[i] for i in idxs] - filtered_costs = [costs[i] for i in idxs] - filtered_scores = [scores[i] for i in idxs] - - f = figure(title=hyper, xlabel='Steps', ylabel='Hyper', legend='Ablate') - y = [EXPERIMENTS[env][hyper][i] for i in idxs] - s = [scores[i] for i in idxs] - x = [steps[i] for i in idxs] - - x, y, s = pareto_points(x, y, s, 0.1) - scatter(f, x, y, s, legend=env_name) - - x = np.array(x) - s = np.array(s) - y = np.array(y) - x_smooth, y_smooth, y_lower, y_upper = loess_fit(x, s, y, n_bins=20, frac=0.4) - s = np.ones_like(x_smooth) - - plot_lines(f, [x_smooth], [y_smooth]) - return f - -from plotly import graph_objects as go -from dash import Output, Input - @app.callback( Output("hyper-agg", "figure"), Input("hyper-agg-slider", "value"), @@ -1008,7 +578,5 @@ def update_hyper_agg_plot(thresh, step_range): return f -# Set layout with static graph -#app.layout = layout if __name__ == '__main__': - app.run(host='0.0.0.0', port=8080) + app.run(host='0.0.0.0', port=8000) From 367c8902841870310d8b952a8c2110b6b4cb94d3 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 3 Oct 2025 21:21:23 -0400 Subject: [PATCH 024/188] stable demo --- plottest.py | 356 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 242 insertions(+), 114 deletions(-) diff --git a/plottest.py b/plottest.py index 56ad5dc0b..ca948241f 100644 --- a/plottest.py +++ b/plottest.py @@ -1,6 +1,7 @@ from dash import Dash, html, dcc from dash.dependencies import Input, Output import plotly.graph_objects as go +#import plotly.express as px import numpy as np import json import glob @@ -11,12 +12,14 @@ FONT_SIZE_TITLE = 28 FONT_SIZE_AXIS = 22 FONT_SIZE_TICK = 20 +FONT_SIZE_TICK_3D = 14 FONT_SIZE_LEGEND = 18 FONT_COLOR = '#f1f1f1' PLOT_BG_COLOR = '#061a1a' PAPER_BG_COLOR = '#061a1a' LINE_WIDTH = 4 LINE_COLORS = ["#0000b3", "#0010d9", "#0020ff", "#0040ff", "#0060ff", "#0080ff", "#009fff", "#00bfff", "#00ffff"][::-1] +roygbiv = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet'] TITLE_FONT = dict( family=FONT_FAMILY, size=FONT_SIZE_TITLE, @@ -32,6 +35,12 @@ size=FONT_SIZE_TICK, color=FONT_COLOR ) +GRID_COLOR = '#00f1f1' +TICK_FONT_3D = dict( + family=FONT_FAMILY, + size=FONT_SIZE_TICK_3D, + color=FONT_COLOR +) LEGEND_FONT = dict( family=FONT_FAMILY, size=FONT_SIZE_LEGEND, @@ -57,34 +66,84 @@ 'train/num_minibatches', 'train/minibatch_size', 'policy/hidden_size', - 'env/frameskip', + #'env/frameskip', 'env/num_envs', ] ALL_KEYS = [ 'agent_steps', 'cost', - 'environment/score' + 'environment/score', + 'environment/perf' ] + HYPERS def figure(title='The Puffer Frontier Project', - xlabel='Uptime', ylabel='Score', - legend='Trial', xaxis_type='linear'): + xlabel='Uptime', ylabel='Score', zlabel='Score', + legend='Trial', log_x=False, log_y=False, log_z=False, + is_3d=False): fig = go.Figure() - fig.update_layout( - title=dict(text=title, font=TITLE_FONT), - xaxis=dict(title=dict(text=xlabel, font=AXIS_FONT), tickfont=TICK_FONT), - yaxis=dict(title=dict(text=ylabel, font=AXIS_FONT), tickfont=TICK_FONT), - xaxis_type=xaxis_type, - showlegend=True, - legend=dict(font=LEGEND_FONT), - plot_bgcolor=PLOT_BG_COLOR, - paper_bgcolor=PAPER_BG_COLOR, - width=1280, - height=720, - autosize=False - ) - fig.update_xaxes(showgrid=False) - fig.update_yaxes(showgrid=False) + + # Common layout settings + layout_dict = { + 'title': dict(text=title, font=TITLE_FONT), + 'showlegend': True, + 'legend': dict(font=LEGEND_FONT), + 'plot_bgcolor': PLOT_BG_COLOR, + 'paper_bgcolor': PAPER_BG_COLOR, + 'width': 1280, + 'height': 720, + 'autosize': False + } + + if is_3d: + # 3D-specific scene configuration + layout_dict['scene'] = dict( + xaxis=dict( + title=dict(text=xlabel, font=AXIS_FONT), + tickfont=TICK_FONT_3D, + type='log' if log_x else 'linear', + showgrid=True, + gridcolor=GRID_COLOR, + backgroundcolor=PLOT_BG_COLOR, + zeroline=False + ), + yaxis=dict( + title=dict(text=ylabel, font=AXIS_FONT), + tickfont=TICK_FONT_3D, + type='log' if log_y else 'linear', + showgrid=True, + gridcolor=GRID_COLOR, + backgroundcolor=PLOT_BG_COLOR, + zeroline=False + ), + zaxis=dict( + title=dict(text=zlabel, font=AXIS_FONT), + tickfont=TICK_FONT_3D, + type='log' if log_z else 'linear', + showgrid=True, + gridcolor=GRID_COLOR, + backgroundcolor=PLOT_BG_COLOR, + zeroline=False + ), + bgcolor=PLOT_BG_COLOR, + ) + else: + # 2D-specific axis configuration + layout_dict.update({ + 'xaxis': dict( + title=dict(text=xlabel, font=AXIS_FONT), + tickfont=TICK_FONT, + #type='log' if log_x else 'linear', + showgrid=False # Keep grid off for 2D + ), + 'yaxis': dict( + title=dict(text=ylabel, font=AXIS_FONT), + tickfont=TICK_FONT, + #type='log' if log_y else 'linear', + showgrid=False + ) + }) + + fig.update_layout(**layout_dict) return fig def scatter(fig, x, y, c, legend='Trial', log_x=False, i=0, showlegend=True): @@ -143,9 +202,6 @@ def plot_quantiles(fig, experiments, thresh, xlabel='Performance', legend='Trial filtered = band(experiments, 'environment/score', qmin, qmax) - if len(filtered['environment/score']) < 5: - continue # Skip - steps = filtered['agent_steps'] # Adjust steps if 'env/frameskip' in experiments: @@ -159,19 +215,20 @@ def plot_quantiles(fig, experiments, thresh, xlabel='Performance', legend='Trial s_one = np.ones_like(x) fx, fy, fs = pareto_points(x, y, s_one, 0.00) - # Plot center line - fig.add_trace( - go.Scatter( - x=fx, - y=fy, - mode='lines', - name=f'{legend} Q{qmin:.2f}', - line=dict( - color=colors[i], - width=LINE_WIDTH + if len(fx) > 1: + fig.add_trace( + go.Scatter( + x=fx, + y=fy, + mode='lines', + name=f'{legend} Q{qmin:.2f}', + line=dict( + color=colors[i], + width=LINE_WIDTH + ), + showlegend=False ) ) - ) # Plot scatter for points in this quantile range fig.add_trace( @@ -184,7 +241,6 @@ def plot_quantiles(fig, experiments, thresh, xlabel='Performance', legend='Trial color=colors[i], size=5 ), - showlegend=False # Hide scatter legend to avoid clutter ) ) @@ -195,6 +251,24 @@ def plot_quantiles(fig, experiments, thresh, xlabel='Performance', legend='Trial return fig +def lim_pareto(costs, scores, soft=0.0): + pareto_costs = [] + pareto_scores = [] + max_score = max(scores) + for i in range(len(scores)): + better = [scores[j] >= scores[i] and + costs[j] < costs[i] + for j in range(len(scores))] + if not any(better): + pareto_costs.append(costs[i]) + pareto_scores.append(scores[i]) + + idxs = np.argsort(pareto_scores) + pareto_costs = [pareto_costs[i] for i in idxs] + pareto_scores = [pareto_scores[i] for i in idxs] + return pareto_costs, pareto_scores + + def pareto_points(steps, costs, scores, soft=0.0): pareto_steps = [] pareto_costs = [] @@ -218,6 +292,17 @@ def pareto_points(steps, costs, scores, soft=0.0): pareto_scores = [pareto_scores[i] for i in idxs] return pareto_steps, pareto_costs, pareto_scores +def pareto_idx(steps, costs, scores): + idxs = [] + for i in range(len(steps)): + better = [scores[j] >= scores[i] and + costs[j] < costs[i] and steps[j] < steps[i] + for j in range(len(scores))] + if not any(better): + idxs.append(i) + + return idxs + def load_sweep_data(path): data = {} keys = None @@ -268,9 +353,19 @@ def cached_sweep_load(path): with open(cache_file, 'r') as f: data = json.load(f) + steps = data['agent_steps'] + costs = data['cost'] + scores = data['environment/score'] + + idxs = pareto_idx(steps, costs, scores) + for k in data: + data[k] = [data[k][i] for i in idxs] + + data['environment/perf'] = [min(e, 1.0) for e in data['environment/perf']] + return data -env_names = ['breakout', 'pong'] +env_names = ['grid', 'tetris', 'breakout', 'pong'] EXPERIMENTS = { name: cached_sweep_load(f'experiments/logs/puffer_{name}') for name in env_names @@ -282,41 +377,76 @@ def cached_sweep_load(path): app.layout = html.Div([ html.H1('The Puffer Frontier Project', style={'textAlign': 'center'}), html.Br(), + html.Label([ - "Score Threshold %: ", - dcc.Slider( - id='pareto-slider', - min=0.0, - max=1.0, - step=0.05, - value=0.95, - marks={i: str(0.05*i) for i in range(0, 21)}, + "X: ", + dcc.Dropdown( + id="optimal-dropdown-x", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="cost", + style={"width": "50%"} + ) + ]), + html.Label([ + "Y: ", + dcc.Dropdown( + id="optimal-dropdown-y", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="agent_steps", + style={"width": "50%"} + ) + ]), + html.Label([ + "Z: ", + dcc.Dropdown( + id="optimal-dropdown-z", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="environment/perf", + style={"width": "50%"} ) ]), - dcc.Graph(id='pareto'), + dcc.Graph(id='optimal'), html.Br(), + html.Label([ - "Score Threshold %: ", - dcc.Slider( - id='hyper-box-thresh', - min=0.0, - max=1.0, - step=0.05, - value=0.95, - marks={i: str(0.05*i) for i in range(0, 21)} + "Environment: ", + dcc.Dropdown( + id="scatter-dropdown-env", + options=[{"label": key, "value": key} for key in env_names], + value="breakout", + style={"width": "50%"} ) ]), html.Label([ - "Bins: ", - dcc.Slider( - id='hyper-box-buckets', - min=1, - max=10, - step=1, - value=5, - marks={i: str(i) for i in range(0, 11)} + "X: ", + dcc.Dropdown( + id="scatter-dropdown-x", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="cost", + style={"width": "50%"} + ) + ]), + html.Label([ + "Y: ", + dcc.Dropdown( + id="scatter-dropdown-y", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="environment/score", + style={"width": "50%"} ) ]), + html.Label([ + "Z: ", + dcc.Dropdown( + id="scatter-dropdown-z", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="agent_steps", + style={"width": "50%"} + ) + ]), + dcc.Graph(id='scatter'), + html.Br(), + html.Label([ "X Axis: ", dcc.Dropdown( @@ -350,65 +480,52 @@ def cached_sweep_load(path): ) ]), dcc.Graph(id='hyper-agg'), - html.Br(), - html.Label([ - "Environment: ", - dcc.Dropdown( - id="scatter-dropdown-env", - options=[{"label": key, "value": key} for key in env_names], - value="breakout", - style={"width": "50%"} - ) - ]), - html.Label([ - "X: ", - dcc.Dropdown( - id="scatter-dropdown-x", - options=[{"label": key, "value": key} for key in ALL_KEYS], - value="agent_steps", - style={"width": "50%"} - ) - ]), - html.Label([ - "Y: ", - dcc.Dropdown( - id="scatter-dropdown-y", - options=[{"label": key, "value": key} for key in ALL_KEYS], - value="environment/score", - style={"width": "50%"} - ) - ]), - dcc.Graph(id='scatter') + ], style={"width": 1280} ) @app.callback( - Output("pareto", "figure"), - Input("pareto-slider", "value") + Output("optimal", "figure"), + Input("optimal-dropdown-x", "value"), + Input("optimal-dropdown-y", "value"), + Input("optimal-dropdown-z", "value") ) -def update_pareto_plot(thresh): - f = figure(title='Compute/Data Pareto Front', - xlabel='Steps', ylabel='Cost', legend='Trial') +def update_optimal_plot(xkey, ykey, zkey): + f = figure(title='Pareto', + xlabel=xkey, ylabel=ykey, zlabel=zkey, + log_x=True, log_y=True, is_3d=True) for i, env in enumerate(EXPERIMENTS): - steps = EXPERIMENTS[env]['agent_steps'] - costs = EXPERIMENTS[env]['cost'] - scores = EXPERIMENTS[env]['environment/score'] + x = EXPERIMENTS[env][xkey] + y = EXPERIMENTS[env][ykey] + z = EXPERIMENTS[env][zkey] - # Header plot - plot_quantiles(f, EXPERIMENTS[env], thresh, - xlabel='Steps', legend=env, log_x=False, i=i) + f.add_trace( + go.Scatter3d( + x=x, + y=y, + z=z, + mode='markers', + name=env, + line=dict( + color=roygbiv[i], + width=LINE_WIDTH + ), + ) + ) return f + @app.callback( Output("scatter", "figure"), Input("scatter-dropdown-env", "value"), Input("scatter-dropdown-x", "value"), - Input("scatter-dropdown-y", "value") + Input("scatter-dropdown-y", "value"), + Input("scatter-dropdown-z", "value") ) -def update_scatter(env, xkey, ykey): +def update_scatter(env, xkey, ykey, zkey): steps = EXPERIMENTS[env]['agent_steps'] costs = EXPERIMENTS[env]['cost'] scores = EXPERIMENTS[env]['environment/score'] @@ -421,21 +538,36 @@ def update_scatter(env, xkey, ykey): f = figure(title='Experiments', xlabel=xkey, ylabel=ykey, legend='Ablate') + steps = EXPERIMENTS[env]['agent_steps'] x = EXPERIMENTS[env][xkey] y = EXPERIMENTS[env][ykey] - c = scores - scatter(f, x, y, c, showlegend=False) + z = EXPERIMENTS[env][zkey] + + mmin = min(z) + mmax = max(z) + thresh = np.geomspace(mmin, mmax, 8) + for j in range(7): + idxs = [i for i, e in enumerate(z) + if thresh[j] < e and e < thresh[j+1]] + fx = [x[i] for i in idxs] + fy = [y[i] for i in idxs] + fz = [z[i] for i in idxs] + + if len(fx) <= 2: + continue + + scatter(f, fx, fy, roygbiv[j], showlegend=False) + return f @app.callback( Output("hyper-box", "figure"), - Input("hyper-box-thresh", "value"), - Input("hyper-box-buckets", "value"), Input("hyper-box-x", "value") ) -def update_hyper_box(thresh, buckets, x): +def update_hyper_box(x): # Initialize data storage + buckets = 8 env_data = {} # Process each environment @@ -449,21 +581,16 @@ def update_hyper_box(thresh, buckets, x): skip = EXPERIMENTS[env]['env/frameskip'] steps = [n*m for n, m in zip(steps, skip)] - # Filter by score threshold - max_score = max(scores) - idxs = [i for i, s in enumerate(scores) if s > thresh*max_score] - # Select x-axis data based on input x_data = costs if x == 'cost' else steps - filtered_x = [x_data[i] for i in idxs] # Get all hyperparameters # Store filtered data for this environment hyper_data = {} - env_data[env] = {'x': filtered_x, 'hypers': hyper_data} + env_data[env] = {'x': x_data, 'hypers': hyper_data} for h in HYPERS: - hyper_data[h] = [EXPERIMENTS[env][h][i] for i in idxs] + hyper_data[h] = EXPERIMENTS[env][h] # Create buckets all_x = [x for env in env_data for x in env_data[env]['x']] @@ -561,6 +688,7 @@ def update_hyper_agg_plot(thresh, step_range): for k, hyper in enumerate(HYPERS): y = [EXPERIMENTS[env][hyper][i] for i in idxs] + ymin = min(y) ymax = max(y) f.add_trace( From 805f5582a5a780825e78d0ccdd5e826622de62ff Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 3 Oct 2025 21:27:44 -0400 Subject: [PATCH 025/188] Cull dead code --- plottest.py | 162 ++++++---------------------------------------------- 1 file changed, 17 insertions(+), 145 deletions(-) diff --git a/plottest.py b/plottest.py index ca948241f..ff5141eb7 100644 --- a/plottest.py +++ b/plottest.py @@ -146,138 +146,12 @@ def figure(title='The Puffer Frontier Project', fig.update_layout(**layout_dict) return fig -def scatter(fig, x, y, c, legend='Trial', log_x=False, i=0, showlegend=True): - mmin = min(c) - mmax = max(c) - - if isinstance(c, str): - colors = c - else: - colors = [] - for e in c: - if mmin != mmax: - v = (e - mmin)/(mmax - mmin) - else: - v = e - - if v < 0.001: - v = 0.001 - colors.append(f'rgb(0, 0.5, {v})') - - fig.add_trace( - go.Scatter( - x=x, - y=y, - mode='markers', - name=legend, - showlegend=showlegend, - marker=dict( - color=colors, - size=10 - ) - ) - ) - -def band(experiments, key, qmin=0.0, qmax=1.0): - mmax = np.array(experiments[key]).max() - top = qmax * mmax - bot = qmin * mmax - filtered = {k: [] for k in experiments} - for i, score in enumerate(experiments[key]): - if score < bot or score > top: - continue - - for k, v in experiments.items(): - filtered[k].append(v[i]) - - return filtered - -def plot_quantiles(fig, experiments, thresh, xlabel='Performance', legend='Trial', log_x=False, i=0): - quantile_thresholds = [1.0, thresh] - colors = [['indigo', 'blue', 'green', 'yellow', 'orange', 'red'][i]] - - for i in range(len(quantile_thresholds) - 1): - qmin = quantile_thresholds[i + 1] - qmax = quantile_thresholds[i] - - filtered = band(experiments, 'environment/score', qmin, qmax) - - steps = filtered['agent_steps'] - # Adjust steps - if 'env/frameskip' in experiments: - skip = experiments['env/frameskip'] - steps = [n*m for n, m in zip(steps, skip)] - - x = steps - y = filtered['cost'] - s = filtered['environment/score'] - - s_one = np.ones_like(x) - fx, fy, fs = pareto_points(x, y, s_one, 0.00) - - if len(fx) > 1: - fig.add_trace( - go.Scatter( - x=fx, - y=fy, - mode='lines', - name=f'{legend} Q{qmin:.2f}', - line=dict( - color=colors[i], - width=LINE_WIDTH - ), - showlegend=False - ) - ) - - # Plot scatter for points in this quantile range - fig.add_trace( - go.Scatter( - x=x, - y=y, - mode='markers', - name=f'{legend} Q{qmin:.2f} Points', - marker=dict( - color=colors[i], - size=5 - ), - ) - ) - - # Update axes - fig.update_xaxes(title_text=xlabel) - if log_x: - fig.update_xaxes(type='log') - - return fig - -def lim_pareto(costs, scores, soft=0.0): - pareto_costs = [] - pareto_scores = [] - max_score = max(scores) - for i in range(len(scores)): - better = [scores[j] >= scores[i] and - costs[j] < costs[i] - for j in range(len(scores))] - if not any(better): - pareto_costs.append(costs[i]) - pareto_scores.append(scores[i]) - - idxs = np.argsort(pareto_scores) - pareto_costs = [pareto_costs[i] for i in idxs] - pareto_scores = [pareto_scores[i] for i in idxs] - return pareto_costs, pareto_scores - - def pareto_points(steps, costs, scores, soft=0.0): pareto_steps = [] pareto_costs = [] pareto_scores = [] max_score = max(scores) for i in range(len(steps)): - #if scores[i] < 0.25*max_score: - # continue - better = [scores[j] >= scores[i] and costs[j] < costs[i]*(1 - soft) and steps[j] < steps[i]*(1 - soft) for j in range(len(scores))] @@ -362,7 +236,11 @@ def cached_sweep_load(path): data[k] = [data[k][i] for i in idxs] data['environment/perf'] = [min(e, 1.0) for e in data['environment/perf']] - + + if 'env/frameskip' in data: + skip = data['env/frameskip'] + data['agent_steps'] = [n*m for n, m in zip(data['agent_steps'], skip)] + return data env_names = ['grid', 'tetris', 'breakout', 'pong'] @@ -530,12 +408,6 @@ def update_scatter(env, xkey, ykey, zkey): costs = EXPERIMENTS[env]['cost'] scores = EXPERIMENTS[env]['environment/score'] - # TODO: This is not applying frameskip - # Adjust steps - if 'env/frameskip' in EXPERIMENTS[env]: - skip = EXPERIMENTS[env]['env/frameskip'] - steps = [n*m for n, m in zip(steps, skip)] - f = figure(title='Experiments', xlabel=xkey, ylabel=ykey, legend='Ablate') steps = EXPERIMENTS[env]['agent_steps'] @@ -556,8 +428,18 @@ def update_scatter(env, xkey, ykey, zkey): if len(fx) <= 2: continue - scatter(f, fx, fy, roygbiv[j], showlegend=False) - + f.add_trace( + go.Scatter( + x=fx, + y=fy, + mode='markers', + showlegend=False, + marker=dict( + color=roygbiv[j], + size=10 + ) + ) + ) return f @@ -576,11 +458,6 @@ def update_hyper_box(x): costs = EXPERIMENTS[env]['cost'] scores = EXPERIMENTS[env]['environment/score'] - # Adjust steps if frameskip exists - if 'env/frameskip' in EXPERIMENTS[env]: - skip = EXPERIMENTS[env]['env/frameskip'] - steps = [n*m for n, m in zip(steps, skip)] - # Select x-axis data based on input x_data = costs if x == 'cost' else steps @@ -672,11 +549,6 @@ def update_hyper_agg_plot(thresh, step_range): costs = EXPERIMENTS[env]['cost'] scores = EXPERIMENTS[env]['environment/score'] - # Adjust steps - if 'env/frameskip' in EXPERIMENTS[env]: - skip = EXPERIMENTS[env]['env/frameskip'] - steps = [n*m for n, m in zip(steps, skip)] - max_score = max(scores) max_steps = max(steps) n = len(scores) From 2ea0cb114d544708ccdc9a2fce1394167da28984 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 3 Oct 2025 22:00:58 -0400 Subject: [PATCH 026/188] Constellation initial prototype --- plottest.py => constellation.py | 295 ++++++++++++-------------------- 1 file changed, 108 insertions(+), 187 deletions(-) rename plottest.py => constellation.py (72%) diff --git a/plottest.py b/constellation.py similarity index 72% rename from plottest.py rename to constellation.py index ff5141eb7..17eb6c53a 100644 --- a/plottest.py +++ b/constellation.py @@ -1,13 +1,13 @@ from dash import Dash, html, dcc from dash.dependencies import Input, Output +import pandas as pd import plotly.graph_objects as go -#import plotly.express as px +import plotly.express as px import numpy as np import json import glob import os -# Global styling variables FONT_FAMILY = 'Arial' FONT_SIZE_TITLE = 28 FONT_SIZE_AXIS = 22 @@ -66,7 +66,6 @@ 'train/num_minibatches', 'train/minibatch_size', 'policy/hidden_size', - #'env/frameskip', 'env/num_envs', ] ALL_KEYS = [ @@ -76,100 +75,10 @@ 'environment/perf' ] + HYPERS -def figure(title='The Puffer Frontier Project', - xlabel='Uptime', ylabel='Score', zlabel='Score', - legend='Trial', log_x=False, log_y=False, log_z=False, - is_3d=False): - fig = go.Figure() - - # Common layout settings - layout_dict = { - 'title': dict(text=title, font=TITLE_FONT), - 'showlegend': True, - 'legend': dict(font=LEGEND_FONT), - 'plot_bgcolor': PLOT_BG_COLOR, - 'paper_bgcolor': PAPER_BG_COLOR, - 'width': 1280, - 'height': 720, - 'autosize': False - } - - if is_3d: - # 3D-specific scene configuration - layout_dict['scene'] = dict( - xaxis=dict( - title=dict(text=xlabel, font=AXIS_FONT), - tickfont=TICK_FONT_3D, - type='log' if log_x else 'linear', - showgrid=True, - gridcolor=GRID_COLOR, - backgroundcolor=PLOT_BG_COLOR, - zeroline=False - ), - yaxis=dict( - title=dict(text=ylabel, font=AXIS_FONT), - tickfont=TICK_FONT_3D, - type='log' if log_y else 'linear', - showgrid=True, - gridcolor=GRID_COLOR, - backgroundcolor=PLOT_BG_COLOR, - zeroline=False - ), - zaxis=dict( - title=dict(text=zlabel, font=AXIS_FONT), - tickfont=TICK_FONT_3D, - type='log' if log_z else 'linear', - showgrid=True, - gridcolor=GRID_COLOR, - backgroundcolor=PLOT_BG_COLOR, - zeroline=False - ), - bgcolor=PLOT_BG_COLOR, - ) - else: - # 2D-specific axis configuration - layout_dict.update({ - 'xaxis': dict( - title=dict(text=xlabel, font=AXIS_FONT), - tickfont=TICK_FONT, - #type='log' if log_x else 'linear', - showgrid=False # Keep grid off for 2D - ), - 'yaxis': dict( - title=dict(text=ylabel, font=AXIS_FONT), - tickfont=TICK_FONT, - #type='log' if log_y else 'linear', - showgrid=False - ) - }) - - fig.update_layout(**layout_dict) - return fig - -def pareto_points(steps, costs, scores, soft=0.0): - pareto_steps = [] - pareto_costs = [] - pareto_scores = [] - max_score = max(scores) - for i in range(len(steps)): - better = [scores[j] >= scores[i] and - costs[j] < costs[i]*(1 - soft) and steps[j] < steps[i]*(1 - soft) - for j in range(len(scores))] - if not any(better): - pareto_steps.append(steps[i]) - pareto_costs.append(costs[i]) - pareto_scores.append(scores[i]) - - idxs = np.argsort(pareto_steps) - pareto_steps = [pareto_steps[i] for i in idxs] - pareto_costs = [pareto_costs[i] for i in idxs] - pareto_scores = [pareto_scores[i] for i in idxs] - return pareto_steps, pareto_costs, pareto_scores - def pareto_idx(steps, costs, scores): idxs = [] for i in range(len(steps)): - better = [scores[j] >= scores[i] and + better = [scores[j] >= scores[i] and costs[j] < costs[i] and steps[j] < steps[i] for j in range(len(scores))] if not any(better): @@ -208,7 +117,6 @@ def load_sweep_data(path): if sweep_key in data and exp[sweep_key] == 'logit_normal': v = 1 - v elif kk in ('train/vtrace_rho_clip', 'train/vtrace_c_clip'): - # Temporary hack for bad bounds v = max(v, 0.1) data[kk].append(v) @@ -249,11 +157,10 @@ def cached_sweep_load(path): for name in env_names } -# Initialize Dash app app = Dash() app.css.append_css({'external_stylesheets': 'dash.css'}) app.layout = html.Div([ - html.H1('The Puffer Frontier Project', style={'textAlign': 'center'}), + html.H1('Puffer Constellation', style={'textAlign': 'center'}), html.Br(), html.Label([ @@ -370,29 +277,57 @@ def cached_sweep_load(path): Input("optimal-dropdown-z", "value") ) def update_optimal_plot(xkey, ykey, zkey): - f = figure(title='Pareto', - xlabel=xkey, ylabel=ykey, zlabel=zkey, - log_x=True, log_y=True, is_3d=True) - - for i, env in enumerate(EXPERIMENTS): - x = EXPERIMENTS[env][xkey] - y = EXPERIMENTS[env][ykey] - z = EXPERIMENTS[env][zkey] - - f.add_trace( - go.Scatter3d( - x=x, - y=y, - z=z, - mode='markers', - name=env, - line=dict( - color=roygbiv[i], - width=LINE_WIDTH - ), - ) + all_x = [] + all_y = [] + all_z = [] + all_env = [] + for env in EXPERIMENTS: + all_x += EXPERIMENTS[env][xkey] + all_y += EXPERIMENTS[env][ykey] + all_z += EXPERIMENTS[env][zkey] + all_env += [env] * len(EXPERIMENTS[env][xkey]) + f = px.scatter_3d(x=all_x, y=all_y, z=all_z, color=all_env, log_x=True, log_y=True, log_z=False, color_discrete_sequence=roygbiv) + layout_dict = { + 'title': dict(text='Pareto', font=TITLE_FONT), + 'showlegend': True, + 'legend': dict(font=LEGEND_FONT), + 'plot_bgcolor': PLOT_BG_COLOR, + 'paper_bgcolor': PAPER_BG_COLOR, + 'width': 1280, + 'height': 720, + 'autosize': False, + 'scene': dict( + xaxis=dict( + title=dict(text=xkey, font=AXIS_FONT), + tickfont=TICK_FONT_3D, + type='log', + showgrid=True, + gridcolor=GRID_COLOR, + backgroundcolor=PLOT_BG_COLOR, + zeroline=False + ), + yaxis=dict( + title=dict(text=ykey, font=AXIS_FONT), + tickfont=TICK_FONT_3D, + type='log', + showgrid=True, + gridcolor=GRID_COLOR, + backgroundcolor=PLOT_BG_COLOR, + zeroline=False + ), + zaxis=dict( + title=dict(text=zkey, font=AXIS_FONT), + tickfont=TICK_FONT_3D, + type='linear', + showgrid=True, + gridcolor=GRID_COLOR, + backgroundcolor=PLOT_BG_COLOR, + zeroline=False + ), + bgcolor=PLOT_BG_COLOR, ) - + } + f.update_layout(**layout_dict) return f @@ -404,43 +339,47 @@ def update_optimal_plot(xkey, ykey, zkey): Input("scatter-dropdown-z", "value") ) def update_scatter(env, xkey, ykey, zkey): - steps = EXPERIMENTS[env]['agent_steps'] - costs = EXPERIMENTS[env]['cost'] - scores = EXPERIMENTS[env]['environment/score'] - - f = figure(title='Experiments', xlabel=xkey, ylabel=ykey, legend='Ablate') - - steps = EXPERIMENTS[env]['agent_steps'] x = EXPERIMENTS[env][xkey] y = EXPERIMENTS[env][ykey] z = EXPERIMENTS[env][zkey] - mmin = min(z) mmax = max(z) thresh = np.geomspace(mmin, mmax, 8) + all_fx = [] + all_fy = [] + bin_label = [] for j in range(7): - idxs = [i for i, e in enumerate(z) - if thresh[j] < e and e < thresh[j+1]] + idxs = [i for i, e in enumerate(z) if thresh[j] < e < thresh[j+1]] + if len(idxs) <= 2: + continue fx = [x[i] for i in idxs] fy = [y[i] for i in idxs] - fz = [z[i] for i in idxs] - - if len(fx) <= 2: - continue - - f.add_trace( - go.Scatter( - x=fx, - y=fy, - mode='markers', - showlegend=False, - marker=dict( - color=roygbiv[j], - size=10 - ) - ) + all_fx += fx + all_fy += fy + bin_label += [str(j)] * len(fx) + f = px.scatter(x=all_fx, y=all_fy, color=bin_label, color_discrete_sequence=roygbiv) + f.update_traces(marker_size=10) + layout_dict = { + 'title': dict(text='Experiments', font=TITLE_FONT), + 'showlegend': False, + 'legend': dict(font=LEGEND_FONT), + 'plot_bgcolor': PLOT_BG_COLOR, + 'paper_bgcolor': PAPER_BG_COLOR, + 'width': 1280, + 'height': 720, + 'autosize': False, + 'xaxis': dict( + title=dict(text=xkey, font=AXIS_FONT), + tickfont=TICK_FONT, + showgrid=False + ), + 'yaxis': dict( + title=dict(text=ykey, font=AXIS_FONT), + tickfont=TICK_FONT, + showgrid=False ) - + } + f.update_layout(**layout_dict) return f @app.callback( @@ -448,75 +387,57 @@ def update_scatter(env, xkey, ykey, zkey): Input("hyper-box-x", "value") ) def update_hyper_box(x): - # Initialize data storage buckets = 8 env_data = {} - - # Process each environment for env in EXPERIMENTS: steps = EXPERIMENTS[env]['agent_steps'] costs = EXPERIMENTS[env]['cost'] scores = EXPERIMENTS[env]['environment/score'] - - # Select x-axis data based on input x_data = costs if x == 'cost' else steps - - # Get all hyperparameters - - # Store filtered data for this environment hyper_data = {} env_data[env] = {'x': x_data, 'hypers': hyper_data} for h in HYPERS: hyper_data[h] = EXPERIMENTS[env][h] - - # Create buckets all_x = [x for env in env_data for x in env_data[env]['x']] x_min, x_max = min(all_x), max(all_x) bucket_edges = np.linspace(x_min, x_max, buckets + 1) bucket_centers = (bucket_edges[:-1] + bucket_edges[1:]) / 2 - - # Initialize heatmap data heatmap_data = np.zeros((len(HYPERS), buckets)) - - # Compute means for each bucket and hyperparameter for i, hyper in enumerate(HYPERS): for j in range(buckets): bucket_means = [] for env in env_data: if hyper not in env_data[env]['hypers']: continue - x_vals = np.array(env_data[env]['x']) hyper_vals = np.array(env_data[env]['hypers'][hyper]) - # Find indices in current bucket idxs = (x_vals >= bucket_edges[j]) & (x_vals < bucket_edges[j+1]) if np.any(idxs): bucket_means.append(np.mean(hyper_vals[idxs])) - - # Average across environments heatmap_data[i, j] = np.mean(bucket_means) if bucket_means else np.nan - heatmap_data = np.log(heatmap_data) - - # Create heatmap - f = figure(title="Hyperparameter Drift", - xlabel=x.capitalize(), - ylabel="Hyperparameters" - ) - - f.add_trace( - go.Heatmap( - x=bucket_centers, - y=HYPERS, - z=heatmap_data, - colorscale='Viridis', - showscale=True, - zmin=np.nanmin(heatmap_data), - zmax=np.nanmax(heatmap_data), - colorbar=dict(title="Value") + f = px.imshow(heatmap_data, x=bucket_centers, y=HYPERS, color_continuous_scale='Viridis', zmin=np.nanmin(heatmap_data), zmax=np.nanmax(heatmap_data), labels=dict(color="Value")) + layout_dict = { + 'title': dict(text="Hyperparameter Drift", font=TITLE_FONT), + 'showlegend': True, + 'legend': dict(font=LEGEND_FONT), + 'plot_bgcolor': PLOT_BG_COLOR, + 'paper_bgcolor': PAPER_BG_COLOR, + 'width': 1280, + 'height': 720, + 'autosize': False, + 'xaxis': dict( + title=dict(text=x.capitalize(), font=AXIS_FONT), + tickfont=TICK_FONT, + showgrid=False + ), + 'yaxis': dict( + title=dict(text="Hyperparameters", font=AXIS_FONT), + tickfont=TICK_FONT, + showgrid=False ) - ) - + } + f.update_layout(**layout_dict) return f @app.callback( @@ -569,8 +490,8 @@ def update_hyper_agg_plot(thresh, step_range): y=[hyper], # Hyperparameter as x-axis base=ymin, showlegend=False, - marker_color='blue', - opacity=0.5, + marker_color='#00f1f1', + opacity=0.25, width=1.0, orientation='h' ) From 8df26d507e7a84099b6e3399953356f33f3d3639 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 3 Oct 2025 22:22:14 -0400 Subject: [PATCH 027/188] Configs for sweeping --- pufferlib/config/ocean/moba.ini | 22 ++++++++++++++++++-- pufferlib/config/ocean/tower_climb.ini | 28 +++++++++++++++++++++----- pufferlib/ocean/torch.py | 4 ++-- setup.py | 2 +- 4 files changed, 46 insertions(+), 10 deletions(-) diff --git a/pufferlib/config/ocean/moba.ini b/pufferlib/config/ocean/moba.ini index 2e0e8cea3..73bcdeb68 100644 --- a/pufferlib/config/ocean/moba.ini +++ b/pufferlib/config/ocean/moba.ini @@ -12,18 +12,36 @@ reward_tower = 4.525112152099609 num_envs = 128 [vec] -num_envs = 8 +num_envs = 4 [train] total_timesteps = 150_000_000 +[sweep] +downsample = 10 +max_cost = 500 + [sweep.train.total_timesteps] distribution = log_normal min = 2e7 -max = 2e8 +max = 5e8 mean = 1e8 scale = auto +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 16 +max = 1024 +mean = 128 +scale = auto + +[sweep.env.num_envs] +distribution = uniform_pow2 +min = 1 +max = 4096 +mean = 2048 +scale = auto + [sweep.env.reward_death] distribution = uniform min = -1.0 diff --git a/pufferlib/config/ocean/tower_climb.ini b/pufferlib/config/ocean/tower_climb.ini index ce6f75d59..7fea71113 100644 --- a/pufferlib/config/ocean/tower_climb.ini +++ b/pufferlib/config/ocean/tower_climb.ini @@ -5,7 +5,7 @@ policy_name = TowerClimb rnn_name = TowerClimbLSTM [vec] -num_envs = 8 +num_envs = 4 [env] num_envs = 1024 @@ -21,12 +21,30 @@ total_timesteps = 150_000_000 #learning_rate = 0.05 minibatch_size = 32768 +[sweep] +downsample = 10 +max_cost = 500 + [sweep.train.total_timesteps] distribution = uniform -min = 50_000_000 -max = 200_000_000 -mean = 100_000_000 -scale = 0.5 +min = 2e7 +max = 1e8 +mean = 1e9 +scale = auto + +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 16 +max = 1024 +mean = 128 +scale = auto + +[sweep.env.num_envs] +distribution = uniform_pow2 +min = 1 +max = 4096 +mean = 2048 +scale = auto [sweep.env.reward_climb_row] distribution = uniform diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index baeda1131..f975b5c22 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -526,8 +526,8 @@ def decode_actions(self, flat_hidden): return action, value class TowerClimbLSTM(pufferlib.models.LSTMWrapper): - def __init__(self, env, policy, input_size = 256, hidden_size = 256): - super().__init__(env, policy, input_size, hidden_size) + def __init__(self, env, policy, hidden_size = 256): + super().__init__(env, policy, hidden_size) class TowerClimb(nn.Module): def __init__(self, env, cnn_channels=16, hidden_size = 256, **kwargs): diff --git a/setup.py b/setup.py index 4b8fbb528..5514d8082 100644 --- a/setup.py +++ b/setup.py @@ -263,7 +263,7 @@ def run(self): 'rich_argparse', 'imageio', 'pyro-ppl', - 'heavyball', + 'heavyball<2.0', 'neptune', 'wandb', ] From fd710a8adb2659436276e11974d17a8a64f40bcc Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Sat, 4 Oct 2025 18:10:01 -0400 Subject: [PATCH 028/188] snake config --- pufferlib/config/ocean/snake.ini | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/pufferlib/config/ocean/snake.ini b/pufferlib/config/ocean/snake.ini index 3827b0252..9eafe9400 100644 --- a/pufferlib/config/ocean/snake.ini +++ b/pufferlib/config/ocean/snake.ini @@ -6,7 +6,7 @@ policy_name = Snake rnn_name = Recurrent [env] -num_envs = 4 +num_envs = 16 width = 640 height = 360 num_snakes = 256 @@ -18,7 +18,7 @@ reward_corpse = 0.1 reward_death = -1.0 [vec] -num_envs = 16 +num_envs = 1 [train] total_timesteps = 500_000_000 @@ -40,6 +40,9 @@ vf_coef = 3.9655925817980053 vtrace_c_clip = 0 vtrace_rho_clip = 0.9285200248552337 +[sweep] +max_cost = 500 + [sweep.env.reward_food] distribution = uniform min = 0.0 @@ -56,7 +59,21 @@ scale = auto [sweep.train.total_timesteps] distribution = log_normal -min = 5e7 -max = 2e8 +min = 2e7 +max = 5e8 mean = 1e8 scale = auto + +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 16 +max = 1024 +mean = 128 +scale = auto + +[sweep.env.num_envs] +distribution = uniform_pow2 +min = 1 +max = 32 +mean = 8 +scale = auto From 4f417cabc86072607c59f7198b9719de4d2d4923 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Mon, 6 Oct 2025 00:52:13 +0000 Subject: [PATCH 029/188] Config --- pufferlib/config/default.ini | 27 +++++++++++++++++++++------ pufferlib/config/ocean/pacman.ini | 26 +++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/pufferlib/config/default.ini b/pufferlib/config/default.ini index e75eb195c..a6064164c 100644 --- a/pufferlib/config/default.ini +++ b/pufferlib/config/default.ini @@ -65,13 +65,28 @@ method = Protein metric = score goal = maximize downsample = 10 +max_cost = 300 -#[sweep.vec.num_envs] -#distribution = uniform_pow2 -#min = 1 -#max = 16 -#mean = 8 -#scale = auto +[sweep.train.total_timesteps] +distribution = log_normal +min = 2e7 +max = 5e8 +mean = 1e8 +scale = auto + +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 16 +max = 1024 +mean = 128 +scale = auto + +[sweep.env.num_envs] +distribution = uniform_pow2 +min = 1 +max = 4096 +mean = 2048 +scale = auto [sweep.train.bptt_horizon] distribution = uniform_pow2 diff --git a/pufferlib/config/ocean/pacman.ini b/pufferlib/config/ocean/pacman.ini index 45055e79b..07f03517e 100644 --- a/pufferlib/config/ocean/pacman.ini +++ b/pufferlib/config/ocean/pacman.ini @@ -5,7 +5,7 @@ policy_name = Policy rnn_name = Recurrent [vec] -num_envs = 8 +num_envs = 4 [env] num_envs = 1024 @@ -31,3 +31,27 @@ vf_coef = 0.31518694995467555 vtrace_c_clip = 0.30575543665366217 vtrace_rho_clip = 1.5301756939690652 +[sweep] +downsample = 10 +max_cost = 300 + +[sweep.train.total_timesteps] +distribution = log_normal +min = 2e7 +max = 5e8 +mean = 1e8 +scale = auto + +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 16 +max = 1024 +mean = 128 +scale = auto + +[sweep.env.num_envs] +distribution = uniform_pow2 +min = 1 +max = 4096 +mean = 2048 +scale = auto From 5ab588a4257c22ee6a15261deb251699382a568d Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 7 Oct 2025 00:54:33 +0000 Subject: [PATCH 030/188] Mem fix for puffer moba w/ shared mem close. Add perf metric to pacman --- constellation.py | 4 ++-- pufferlib/ocean/env_binding.h | 8 +++++++ pufferlib/ocean/moba/binding.c | 39 ++++++++++++++++++++++++++++++++ pufferlib/ocean/moba/moba.py | 1 + pufferlib/ocean/pacman/binding.c | 1 + pufferlib/ocean/pacman/pacman.h | 2 ++ pufferlib/pufferl.py | 6 ++--- 7 files changed, 56 insertions(+), 5 deletions(-) diff --git a/constellation.py b/constellation.py index 17eb6c53a..4647667ac 100644 --- a/constellation.py +++ b/constellation.py @@ -19,7 +19,7 @@ PAPER_BG_COLOR = '#061a1a' LINE_WIDTH = 4 LINE_COLORS = ["#0000b3", "#0010d9", "#0020ff", "#0040ff", "#0060ff", "#0080ff", "#009fff", "#00bfff", "#00ffff"][::-1] -roygbiv = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet'] +roygbiv = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet', 'white'] TITLE_FONT = dict( family=FONT_FAMILY, size=FONT_SIZE_TITLE, @@ -151,7 +151,7 @@ def cached_sweep_load(path): return data -env_names = ['grid', 'tetris', 'breakout', 'pong'] +env_names = ['grid', 'moba', 'tower_climb', 'tetris', 'breakout', 'pong', 'g2048', 'snake'] EXPERIMENTS = { name: cached_sweep_load(f'experiments/logs/puffer_{name}') for name in env_names diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index f64b6148b..64efe1cf1 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -12,6 +12,13 @@ static PyObject* my_shared(PyObject* self, PyObject* args, PyObject* kwargs) { } #endif +static PyObject* my_shared_close(PyObject* self, PyObject* args); +#ifndef MY_SHARED_CLOSE +static PyObject* my_shared_close(PyObject* self, PyObject* args) { + return 0; +} +#endif + static PyObject* my_get(PyObject* dict, Env* env); #ifndef MY_GET static PyObject* my_get(PyObject* dict, Env* env) { @@ -657,6 +664,7 @@ static PyMethodDef methods[] = { {"vec_render", vec_render, METH_VARARGS, "Render the vector of environments"}, {"vec_close", vec_close, METH_VARARGS, "Close the vector of environments"}, {"shared", (PyCFunction)my_shared, METH_VARARGS | METH_KEYWORDS, "Shared state"}, + {"shared_close", my_shared_close, METH_VARARGS, "Close shared state"}, MY_METHODS, {NULL, NULL, 0, NULL} }; diff --git a/pufferlib/ocean/moba/binding.c b/pufferlib/ocean/moba/binding.c index c48c5fc9e..bfc652881 100644 --- a/pufferlib/ocean/moba/binding.c +++ b/pufferlib/ocean/moba/binding.c @@ -2,6 +2,7 @@ #define Env MOBA #define MY_SHARED +#define MY_SHARED_CLOSE #include "../env_binding.h" static PyObject* my_shared(PyObject* self, PyObject* args, PyObject* kwargs) { @@ -22,6 +23,44 @@ static PyObject* my_shared(PyObject* self, PyObject* args, PyObject* kwargs) { return PyLong_FromVoidPtr(state); } +static PyObject* my_shared_close(PyObject* self, PyObject* args) { + PyObject* handle_obj = PyTuple_GetItem(args, 0); + if (!PyObject_TypeCheck(handle_obj, &PyLong_Type)) { + PyErr_SetString(PyExc_TypeError, "state handle must be an integer"); + return NULL; + } + + PyObject* state_dict = (PyObject*)PyLong_AsVoidPtr(handle_obj); + + PyObject* ai_path_buffer_handle = PyDict_GetItemString(state_dict, "ai_path_buffer"); + if (ai_path_buffer_handle == NULL) { + PyErr_SetString(PyExc_KeyError, "Key 'ai_path_buffer' not found in state"); + return NULL; + } + int* ai_path_buffer = (int*)PyLong_AsVoidPtr(ai_path_buffer_handle); + free(ai_path_buffer); + + PyObject* ai_paths_handle = PyDict_GetItemString(state_dict, "ai_paths"); + if (ai_paths_handle == NULL) { + PyErr_SetString(PyExc_KeyError, "Key 'ai_paths' not found in state"); + return NULL; + } + unsigned char* ai_paths = (unsigned char*)PyLong_AsVoidPtr(ai_paths_handle); + free(ai_paths); + + PyObject* game_map_handle = PyDict_GetItemString(state_dict, "game_map"); + if (game_map_handle == NULL) { + PyErr_SetString(PyExc_KeyError, "Key 'game_map' not found in state"); + return NULL; + } + unsigned char* game_map = (unsigned char*)PyLong_AsVoidPtr(game_map_handle); + free(game_map); + + Py_INCREF(Py_None); + return Py_None; +} + + static int my_init(Env* env, PyObject* args, PyObject* kwargs) { env->vision_range = unpack(kwargs, "vision_range"); env->agent_speed = unpack(kwargs, "agent_speed"); diff --git a/pufferlib/ocean/moba/moba.py b/pufferlib/ocean/moba/moba.py index 4b4dd0be8..362e76e53 100644 --- a/pufferlib/ocean/moba/moba.py +++ b/pufferlib/ocean/moba/moba.py @@ -78,6 +78,7 @@ def render(self): binding.vec_render(self.c_envs, 0) def close(self): + binding.shared_close(self.c_state) binding.vec_close(self.c_envs) diff --git a/pufferlib/ocean/pacman/binding.c b/pufferlib/ocean/pacman/binding.c index f9f8c1c81..6bcf45e49 100644 --- a/pufferlib/ocean/pacman/binding.c +++ b/pufferlib/ocean/pacman/binding.c @@ -17,6 +17,7 @@ static int my_init(Env* env, PyObject* args, PyObject* kwargs) { static int my_log(PyObject* dict, Log* log) { assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "perf", log->perf); assign_to_dict(dict, "episode_return", log->episode_return); assign_to_dict(dict, "episode_length", log->episode_length); return 0; diff --git a/pufferlib/ocean/pacman/pacman.h b/pufferlib/ocean/pacman/pacman.h index b00c7002c..a1f29ff45 100644 --- a/pufferlib/ocean/pacman/pacman.h +++ b/pufferlib/ocean/pacman/pacman.h @@ -32,6 +32,7 @@ struct Log { float episode_return; float episode_length; float score; + float perf; float n; }; @@ -153,6 +154,7 @@ typedef struct PacmanEnv { void add_log(PacmanEnv *env) { env->log.score += env->score; + env->log.perf += (float)env->score / NUM_DOTS; env->log.episode_return += env->score; env->log.episode_length = env->step_count; env->log.n++; diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index f65880c3c..e86b455bc 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -188,7 +188,7 @@ def __init__(self, config, vecenv, policy, logger=None): # Dashboard self.model_size = sum(p.numel() for p in policy.parameters() if p.requires_grad) - #self.print_dashboard(clear=True) + self.print_dashboard(clear=True) @property def uptime(self): @@ -434,7 +434,7 @@ def train(self): if done_training or self.global_step == 0 or time.time() > self.last_log_time + 0.25: logs = self.mean_and_log() self.losses = losses - #self.print_dashboard() + self.print_dashboard() self.stats = defaultdict(list) self.last_log_time = time.time() self.last_log_step = self.global_step @@ -953,7 +953,7 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None): if logs is not None: all_logs.append(logs) - #pufferl.print_dashboard() + pufferl.print_dashboard() model_path = pufferl.close() pufferl.logger.log_cost(uptime) pufferl.logger.close(model_path) From 6260d3293ff7718a270e0d4ea70346826a52f9a4 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 7 Oct 2025 00:56:51 +0000 Subject: [PATCH 031/188] comment dash --- pufferlib/pufferl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index e86b455bc..f65880c3c 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -188,7 +188,7 @@ def __init__(self, config, vecenv, policy, logger=None): # Dashboard self.model_size = sum(p.numel() for p in policy.parameters() if p.requires_grad) - self.print_dashboard(clear=True) + #self.print_dashboard(clear=True) @property def uptime(self): @@ -434,7 +434,7 @@ def train(self): if done_training or self.global_step == 0 or time.time() > self.last_log_time + 0.25: logs = self.mean_and_log() self.losses = losses - self.print_dashboard() + #self.print_dashboard() self.stats = defaultdict(list) self.last_log_time = time.time() self.last_log_step = self.global_step @@ -953,7 +953,7 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None): if logs is not None: all_logs.append(logs) - pufferl.print_dashboard() + #pufferl.print_dashboard() model_path = pufferl.close() pufferl.logger.log_cost(uptime) pufferl.logger.close(model_path) From 11b0ca7493e4235b0159b87a686a190eab33348b Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 7 Oct 2025 14:48:37 +0000 Subject: [PATCH 032/188] triple triad config; verbose option in pufferl --- pufferlib/config/ocean/tripletriad.ini | 6 +++--- pufferlib/pufferl.py | 18 +++++++++++------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/pufferlib/config/ocean/tripletriad.ini b/pufferlib/config/ocean/tripletriad.ini index aae55d096..4d4a1ffd8 100644 --- a/pufferlib/config/ocean/tripletriad.ini +++ b/pufferlib/config/ocean/tripletriad.ini @@ -8,14 +8,14 @@ rnn_name = Recurrent num_envs = 1024 [vec] -num_envs = 8 +num_envs = 4 [train] total_timesteps = 100_000_000 [sweep.train.total_timesteps] distribution = log_normal -min = 5e7 +min = 1e7 max = 2e8 mean = 1e8 -scale = 0.25 +scale = time diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index f65880c3c..dd6e448e3 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -50,7 +50,7 @@ ADVANTAGE_CUDA = shutil.which("nvcc") is not None class PuffeRL: - def __init__(self, config, vecenv, policy, logger=None): + def __init__(self, config, vecenv, policy, logger=None, verbose=True): # Backend perf optimization torch.set_float32_matmul_precision('high') torch.backends.cudnn.deterministic = config['torch_deterministic'] @@ -185,10 +185,11 @@ def __init__(self, config, vecenv, policy, logger=None): self.stats = defaultdict(list) self.last_stats = defaultdict(list) self.losses = {} + self.verbose = verbose # Dashboard self.model_size = sum(p.numel() for p in policy.parameters() if p.requires_grad) - #self.print_dashboard(clear=True) + self.print_dashboard(clear=True) @property def uptime(self): @@ -434,7 +435,7 @@ def train(self): if done_training or self.global_step == 0 or time.time() > self.last_log_time + 0.25: logs = self.mean_and_log() self.losses = losses - #self.print_dashboard() + self.print_dashboard() self.stats = defaultdict(list) self.last_log_time = time.time() self.last_log_step = self.global_step @@ -526,6 +527,9 @@ def save_checkpoint(self): def print_dashboard(self, clear=False, idx=[0], c1='[cyan]', c2='[white]', b1='[bright_cyan]', b2='[bright_white]'): + if not self.verbose: + return + config = self.config sps = dist_sum(self.sps, config['device']) agent_steps = dist_sum(self.global_step, config['device']) @@ -880,7 +884,7 @@ def download(self): model_file = max(os.listdir(data_dir)) return f'{data_dir}/{model_file}' -def train(env_name, args=None, vecenv=None, policy=None, logger=None): +def train(env_name, args=None, vecenv=None, policy=None, logger=None, verbose=True): args = args or load_config(env_name) # Assume TorchRun DDP is used if LOCAL_RANK is set @@ -917,7 +921,7 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None): logger = WandbLogger(args) train_config = dict(**args['train'], env=env_name) - pufferl = PuffeRL(train_config, vecenv, policy, logger) + pufferl = PuffeRL(train_config, vecenv, policy, logger, verbose) pufferl.logger.init(args) all_logs = [] @@ -953,7 +957,7 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None): if logs is not None: all_logs.append(logs) - #pufferl.print_dashboard() + pufferl.print_dashboard() model_path = pufferl.close() pufferl.logger.log_cost(uptime) pufferl.logger.close(model_path) @@ -1026,7 +1030,7 @@ def _sweep_worker(env_name, q_host, q_worker, device): np.random.seed(seed) torch.manual_seed(seed) try: - all_logs = train(env_name, args=args) + all_logs = train(env_name, args=args, verbose=False) except Exception: import traceback traceback.print_exc() From 3a0ff8faab2d6c4976a409908359ee71c2057f6b Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 7 Oct 2025 11:00:41 -0400 Subject: [PATCH 033/188] plot c --- pufferlib/ocean/plot/plot.c | 105 +++++++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 32 deletions(-) diff --git a/pufferlib/ocean/plot/plot.c b/pufferlib/ocean/plot/plot.c index d64999f80..9bd6d7baf 100644 --- a/pufferlib/ocean/plot/plot.c +++ b/pufferlib/ocean/plot/plot.c @@ -8,6 +8,68 @@ const Color PUFF_CYAN = (Color){0, 187, 187, 255}; const Color PUFF_WHITE = (Color){241, 241, 241, 241}; const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; +typedef struct PlotArgs { + int title_font_size; + int axis_font_size; + int axis_tick_font_size; + int legend_font_size; + int line_width; + int margin; + Color font_color; + Color background_color; + Color axis_color; +} PlotArgs; + +PlotArgs DEFAULT_PLOT_ARGS = { + .title_font_size = 24, + .axis_font_size = 16, + .axis_tick_font_size = 12, + .legend_font_size = 12, + .line_width = 2, + .margin = 50, + .font_color = PUFF_WHITE, + .background_color = PUFF_BACKGROUND, + .axis_color = PUFF_WHITE, +}; + +void plot(float* x, float* y, int num_points, PlotArgs args) { + int width = GetScreenWidth(); + int height = GetScreenHeight(); + + // Draw axes + DrawLine(args.margin, args.margin, + args.margin, height - args.margin, PUFF_WHITE); + DrawLine(args.margin, height - args.margin, + width - args.margin, height - args.margin, PUFF_WHITE); + + // Find min/max for scaling + float min_x = x[0], max_x = x[0], min_y = y[0], max_y = y[0]; + for (int j = 1; j < num_points; j++) { + if (x[j] < min_x) min_x = x[j]; + if (x[j] > max_x) max_x = x[j]; + if (y[j] < min_y) min_y = y[j]; + if (y[j] > max_y) max_y = y[j]; + } + float dx = max_x - min_x; + float dy = max_y - min_y; + if (dx == 0) dx = 1.0f; + if (dy == 0) dy = 1.0f; + min_x -= 0.1f * dx; max_x += 0.1f * dx; + min_y -= 0.1f * dy; max_y += 0.1f * dy; + dx = max_x - min_x; + dy = max_y - min_y; + + // Plot lines + for (int j = 0; j < num_points - 1; j++) { + float x1 = args.margin + (x[j] - min_x) / dx * (width - 2*args.margin); + float y1 = (height - args.margin) - (y[j] - min_y) / dy * (height - 2*args.margin); + float x2 = args.margin + (x[j + 1] - min_x) / dx * (width - 2*args.margin); + float y2 = (height - args.margin) - (y[j + 1] - min_y) / dy * (height - 2*args.margin); + DrawLine(x1, y1, x2, y2, PUFF_CYAN); + } +} + + int main(void) { // Read CSV file FILE *fp = fopen("pufferlib/ocean/plot/data.csv", "r"); @@ -49,23 +111,6 @@ int main(void) { } fclose(fp); - // Find min/max for scaling - float min_x = x[0], max_x = x[0], min_y = y[0], max_y = y[0]; - for (int j = 1; j < num_points; j++) { - if (x[j] < min_x) min_x = x[j]; - if (x[j] > max_x) max_x = x[j]; - if (y[j] < min_y) min_y = y[j]; - if (y[j] > max_y) max_y = y[j]; - } - float dx = max_x - min_x; - float dy = max_y - min_y; - if (dx == 0) dx = 1.0f; - if (dy == 0) dy = 1.0f; - min_x -= 0.1f * dx; max_x += 0.1f * dx; - min_y -= 0.1f * dy; max_y += 0.1f * dy; - dx = max_x - min_x; - dy = max_y - min_y; - // Initialize Raylib const int screenWidth = 800; const int screenHeight = 600; @@ -73,23 +118,19 @@ int main(void) { InitWindow(screenWidth, screenHeight, "CSV Data Plot"); SetTargetFPS(60); + RenderTexture2D fig = LoadRenderTexture(screenWidth, screenHeight); + while (!WindowShouldClose()) { - BeginDrawing(); + BeginTextureMode(fig); ClearBackground(PUFF_BACKGROUND); - - // Draw axes - DrawLine(margin, margin, margin, screenHeight - margin, PUFF_WHITE); // Y-axis - DrawLine(margin, screenHeight - margin, screenWidth - margin, screenHeight - margin, PUFF_WHITE); // X-axis - - // Plot lines - for (int j = 0; j < num_points - 1; j++) { - float px1 = margin + (x[j] - min_x) / dx * (screenWidth - 2 * margin); - float py1 = (screenHeight - margin) - (y[j] - min_y) / dy * (screenHeight - 2 * margin); - float px2 = margin + (x[j + 1] - min_x) / dx * (screenWidth - 2 * margin); - float py2 = (screenHeight - margin) - (y[j + 1] - min_y) / dy * (screenHeight - 2 * margin); - DrawLine(px1, py1, px2, py2, PUFF_CYAN); - } - + plot(x, y, num_points, DEFAULT_PLOT_ARGS); + EndTextureMode(); + BeginDrawing(); + DrawTextureRec( + fig.texture, + (Rectangle){ 0, 0, fig.texture.width, -fig.texture.height }, + (Vector2){ 0, 0 }, WHITE + ); EndDrawing(); } From a06fa9cdfa276968b3d8c1c420dbb76bba6da6d8 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 7 Oct 2025 19:41:35 +0000 Subject: [PATCH 034/188] Initial plot dash --- pufferlib/ocean/plot/plot.c | 507 +++++++++++++++++++++++++++++++----- 1 file changed, 444 insertions(+), 63 deletions(-) diff --git a/pufferlib/ocean/plot/plot.c b/pufferlib/ocean/plot/plot.c index 9bd6d7baf..488614e37 100644 --- a/pufferlib/ocean/plot/plot.c +++ b/pufferlib/ocean/plot/plot.c @@ -3,38 +3,98 @@ #include #include "raylib.h" +#define RAYGUI_IMPLEMENTATION +#include "raygui.h" + +#include "cJSON.h" + const Color PUFF_RED = (Color){187, 0, 0, 255}; const Color PUFF_CYAN = (Color){0, 187, 187, 255}; const Color PUFF_WHITE = (Color){241, 241, 241, 241}; const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; +const float EMPTY = -4242.0f; + +#define SEP 4 +#define SETTINGS_HEIGHT 20 +#define DROPDOWN_WIDTH 200 + typedef struct PlotArgs { + float x_min; + float x_max; + float y_min; + float y_max; + float z_min; + float z_max; + int width; + int height; int title_font_size; int axis_font_size; int axis_tick_font_size; int legend_font_size; int line_width; + int tick_length; int margin; Color font_color; Color background_color; Color axis_color; + char* x_label; + char* y_label; + Font font; } PlotArgs; PlotArgs DEFAULT_PLOT_ARGS = { + .x_min = EMPTY, + .x_max = EMPTY, + .y_min = EMPTY, + .y_max = EMPTY, + .z_min = EMPTY, + .z_max = EMPTY, + .width = 960, + .height = 540 - SETTINGS_HEIGHT, .title_font_size = 24, - .axis_font_size = 16, + .axis_font_size = 20, .axis_tick_font_size = 12, .legend_font_size = 12, .line_width = 2, - .margin = 50, + .tick_length = 8, + .margin = 70, .font_color = PUFF_WHITE, .background_color = PUFF_BACKGROUND, .axis_color = PUFF_WHITE, + .x_label = "Cost", + .y_label = "Score", }; -void plot(float* x, float* y, int num_points, PlotArgs args) { - int width = GetScreenWidth(); - int height = GetScreenHeight(); +#include +#include +#include + +const char* format_tick_label(double value) { + static char buffer[32]; + int precision = 2; + + if (fabs(value) < 1e-10) { + strcpy(buffer, "0"); + return buffer; + } + + if (fabs(value) < 0.01 || fabs(value) > 10000) { + snprintf(buffer, sizeof(buffer), "%.2e", value); + } else { + snprintf(buffer, sizeof(buffer), "%.*f", precision, value); + + char *end = buffer + strlen(buffer) - 1; + while (end > buffer && *end == '0') *end-- = '\0'; + if (end > buffer && *end == '.') *end = '\0'; + } + + return buffer; +} + +void draw_axes(PlotArgs args) { + int width = args.width; + int height = args.height; // Draw axes DrawLine(args.margin, args.margin, @@ -42,100 +102,421 @@ void plot(float* x, float* y, int num_points, PlotArgs args) { DrawLine(args.margin, height - args.margin, width - args.margin, height - args.margin, PUFF_WHITE); + // X label + Vector2 x_font_size = MeasureTextEx(args.font, args.x_label, args.axis_font_size, 0); + DrawText( + args.x_label, + width/2 - x_font_size.x/2, + height - x_font_size.y, + args.axis_font_size, + PUFF_WHITE + ); + + // Y label + Vector2 y_font_size = MeasureTextEx(args.font, args.y_label, args.axis_font_size, 0); + DrawTextPro( + args.font, + args.y_label, + (Vector2){ + 0, + height/2 + y_font_size.x/2 + }, + (Vector2){ 0, 0 }, + -90, + args.axis_font_size, + 0, + PUFF_WHITE + ); + + // Autofit number of ticks + Vector2 tick_label_size = MeasureTextEx(args.font, "estimate", args.axis_font_size, 0); + int num_x_ticks = (width - 2*args.margin)/tick_label_size.x; + int num_y_ticks = (height - 2*args.margin)/tick_label_size.x; + + // X ticks + for (int i=0; i max) max = ary[i]; + } + return max; +} + +void plot(float* x, float* y, int num_points, PlotArgs args) { + int width = args.width; + int height = args.height; + // Find min/max for scaling - float min_x = x[0], max_x = x[0], min_y = y[0], max_y = y[0]; - for (int j = 1; j < num_points; j++) { - if (x[j] < min_x) min_x = x[j]; - if (x[j] > max_x) max_x = x[j]; - if (y[j] < min_y) min_y = y[j]; - if (y[j] > max_y) max_y = y[j]; - } - float dx = max_x - min_x; - float dy = max_y - min_y; + //float z_min = args.z_min == EMPTY ? ary_min(z, num_points) : args.z_min; + //float z_max = args.z_max == EMPTY ? ary_max(z, num_points) : args.z_max; + + float x_min = args.x_min; + float x_max = args.x_max; + float y_min = args.y_min; + float y_max = args.y_max; + + float dx = x_max - x_min; + float dy = y_max - y_min; if (dx == 0) dx = 1.0f; if (dy == 0) dy = 1.0f; - min_x -= 0.1f * dx; max_x += 0.1f * dx; - min_y -= 0.1f * dy; max_y += 0.1f * dy; - dx = max_x - min_x; - dy = max_y - min_y; + x_min -= 0.1f * dx; x_max += 0.1f * dx; + y_min -= 0.1f * dy; y_max += 0.1f * dy; + dx = x_max - x_min; + dy = y_max - y_min; // Plot lines for (int j = 0; j < num_points - 1; j++) { - float x1 = args.margin + (x[j] - min_x) / dx * (width - 2*args.margin); - float y1 = (height - args.margin) - (y[j] - min_y) / dy * (height - 2*args.margin); - float x2 = args.margin + (x[j + 1] - min_x) / dx * (width - 2*args.margin); - float y2 = (height - args.margin) - (y[j + 1] - min_y) / dy * (height - 2*args.margin); + float x1 = args.margin + (x[j] - x_min) / dx * (width - 2*args.margin); + float y1 = (height - args.margin) - (y[j] - y_min) / dy * (height - 2*args.margin); + /* + float x2 = args.margin + (x[j + 1] - x_min) / dx * (width - 2*args.margin); + float y2 = (height - args.margin) - (y[j + 1] - y_min) / dy * (height - 2*args.margin); DrawLine(x1, y1, x2, y2, PUFF_CYAN); + */ + DrawCircle(x1, y1, args.line_width, PUFF_CYAN); } } +typedef struct { + char *key; + float *values; + int size; +} KeyValue; + +float* get_values(KeyValue *map, int map_count, char *search_key, int *out_size) { + for (int i = 0; i < map_count; i++) { + if (map[i].key && strcmp(map[i].key, search_key) == 0) { + *out_size = map[i].size; + return map[i].values; + } + } + return NULL; +} + +int cleanup(KeyValue *map, int map_count, cJSON *root, char *json_str) { + if (map) { + for (int i = 0; i < map_count; i++) { + if (map[i].key) free(map[i].key); + if (map[i].values) free(map[i].values); + } + free(map); + } + if (root) cJSON_Delete(root); + if (json_str) free(json_str); + return 1; +} int main(void) { - // Read CSV file - FILE *fp = fopen("pufferlib/ocean/plot/data.csv", "r"); - if (!fp) { - printf("Failed to open data.csv\n"); + FILE *file = fopen("pufferlib/ocean/plot/data.json", "r"); + if (!file) { + printf("Error opening file\n"); return 1; } - // Skip header line - char line[1024]; - if (!fgets(line, sizeof(line), fp)) { - printf("Failed to read header\n"); - fclose(fp); + fseek(file, 0, SEEK_END); + long file_size = ftell(file); + fseek(file, 0, SEEK_SET); + char *json_str = malloc(file_size + 1); + if (!json_str) { + printf("Memory allocation error\n"); + fclose(file); return 1; } - // Count lines for number of points - int num_points = 0; - long file_pos = ftell(fp); - while (fgets(line, sizeof(line), fp)) num_points++; - rewind(fp); - fseek(fp, file_pos, SEEK_SET); // Reset to after header + // Read file into buffer + fread(json_str, 1, file_size, file); + json_str[file_size] = '\0'; + fclose(file); - if (num_points == 0) { - printf("No data points\n"); - fclose(fp); + cJSON *root = cJSON_Parse(json_str); + if (!root) { + printf("JSON parse error: %s\n", cJSON_GetErrorPtr()); + free(json_str); return 1; } - float *x = malloc(num_points * sizeof(float)); - float *y = malloc(num_points * sizeof(float)); - int i = 0; - while (fgets(line, sizeof(line), fp)) { - char *token = strtok(line, ","); - if (token) x[i] = atof(token); - token = strtok(NULL, ","); - if (token) y[i] = atof(token); - i++; + if (!cJSON_IsObject(root)) { + printf("Error: Root is not an object\n"); + return cleanup(NULL, 0, root, json_str); + } + + int map_count = 0; + cJSON *item = root->child; + while (item) { + map_count++; + item = item->next; + } + KeyValue *map = calloc(map_count, sizeof(KeyValue)); + if (!map) { + printf("Memory allocation error\n"); + return cleanup(NULL, 0, root, json_str); } - fclose(fp); + // Load all keys and their float arrays + int idx = 0; + item = root->child; + while (item) { + map[idx].key = strdup(item->string); + if (!map[idx].key) { + printf("Memory allocation error for key\n"); + return cleanup(map, map_count, root, json_str); + } + + if (!cJSON_IsArray(item)) { + printf("Error: Value for key '%s' is not an array\n", map[idx].key); + return cleanup(map, map_count, root, json_str); + } + + int array_size = cJSON_GetArraySize(item); + map[idx].values = malloc(array_size * sizeof(float)); + if (!map[idx].values) { + printf("Memory allocation error for values\n"); + return cleanup(map, map_count, root, json_str); + } + + map[idx].size = array_size; + + for (int j = 0; j < array_size; j++) { + cJSON *sub = cJSON_GetArrayItem(item, j); + if (cJSON_IsNumber(sub)) { + map[idx].values[j] = (float)sub->valuedouble; + } else { + continue; + printf("Error: Non-number in array for key '%s' at index %d\n", map[idx].key, j); + return cleanup(map, map_count, root, json_str); + } + } + + idx++; + item = item->next; + } + + + // Example usage: Print the arrays + // Cleanup + //free(cost_array); + //free(score_array); + //cJSON_Delete(root); + //free(json_str); + + //float *x = cost_array; + //float *y = score_array; + //float num_points = cost_size; + + //float *x = malloc(num_points * sizeof(float)); + //float *y = malloc(num_points * sizeof(float)); + + // Initialize Raylib - const int screenWidth = 800; - const int screenHeight = 600; - const int margin = 50; - InitWindow(screenWidth, screenHeight, "CSV Data Plot"); + + InitWindow(2*DEFAULT_PLOT_ARGS.width, 2*DEFAULT_PLOT_ARGS.height + 2*SETTINGS_HEIGHT, "Puffer Constellation"); + ClearBackground(PUFF_BACKGROUND); SetTargetFPS(60); - RenderTexture2D fig = LoadRenderTexture(screenWidth, screenHeight); + + PlotArgs args1 = DEFAULT_PLOT_ARGS; + args1.font = GetFontDefault(); + RenderTexture2D fig1 = LoadRenderTexture(args1.width, args1.height); + bool fig1_x_active = false; + int fig1_x_idx = 1; + bool fig1_y_active = false; + int fig1_y_idx = 0; + + PlotArgs args2 = DEFAULT_PLOT_ARGS; + args2.font = GetFontDefault(); + RenderTexture2D fig2 = LoadRenderTexture(args2.width, args2.height); + bool fig2_x_active = false; + int fig2_x_idx = 2; + bool fig2_y_active = false; + int fig2_y_idx = 0; + + PlotArgs args3 = DEFAULT_PLOT_ARGS; + args3.font = GetFontDefault(); + RenderTexture2D fig3 = LoadRenderTexture(args3.width, args3.height); + bool fig3_x_active = false; + int fig3_x_idx = 3; + bool fig3_y_active = false; + int fig3_y_idx = 0; + + PlotArgs args4 = DEFAULT_PLOT_ARGS; + args4.font = GetFontDefault(); + RenderTexture2D fig4 = LoadRenderTexture(args4.width, args4.height); + bool fig4_x_active = false; + int fig4_x_idx = 4; + bool fig4_y_active = false; + int fig4_y_idx = 0; + + char* items[] = {"environment/score", "cost", "train/learning_rate", "train/gamma", "train/gae_lambda"}; + char options[] = "environment/score;cost;train/learning_rate;train/gamma;train/gae_lambda"; + + float* x; + float* y; + int num_points; while (!WindowShouldClose()) { - BeginTextureMode(fig); + BeginDrawing(); + ClearBackground(PUFF_BACKGROUND); + + x = get_values(map, map_count, items[fig1_x_idx], &num_points); + y = get_values(map, map_count, items[fig1_y_idx], &num_points); + args1.x_min = ary_min(x, num_points); + args1.x_max = ary_max(x, num_points); + args1.y_min = ary_min(y, num_points); + args1.y_max = ary_max(y, num_points); + BeginTextureMode(fig1); ClearBackground(PUFF_BACKGROUND); - plot(x, y, num_points, DEFAULT_PLOT_ARGS); + plot(x, y, num_points, args1); + draw_axes(args1); EndTextureMode(); - BeginDrawing(); DrawTextureRec( - fig.texture, - (Rectangle){ 0, 0, fig.texture.width, -fig.texture.height }, - (Vector2){ 0, 0 }, WHITE + fig1.texture, + (Rectangle){ 0, 0, fig1.texture.width, -fig1.texture.height }, + (Vector2){ 0, SETTINGS_HEIGHT }, WHITE ); + Rectangle fig1_x_rect = {0, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig1_x_rect, options, &fig1_x_idx, fig1_x_active)){ + fig1_x_active = !fig1_x_active; + } + Rectangle fig1_y_rect = {DROPDOWN_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig1_y_rect, options, &fig1_y_idx, fig1_y_active)){ + fig1_y_active = !fig1_y_active; + } + + x = get_values(map, map_count, items[fig2_x_idx], &num_points); + y = get_values(map, map_count, items[fig2_y_idx], &num_points); + args2.x_min = ary_min(x, num_points); + args2.x_max = ary_max(x, num_points); + args2.y_min = ary_min(y, num_points); + args2.y_max = ary_max(y, num_points); + BeginTextureMode(fig2); + ClearBackground(PUFF_BACKGROUND); + plot(x, y, num_points, args2); + draw_axes(args2); + EndTextureMode(); + DrawTextureRec( + fig2.texture, + (Rectangle){0, 0, fig2.texture.width, -fig2.texture.height }, + (Vector2){ fig1.texture.width, SETTINGS_HEIGHT }, WHITE + ); + Rectangle fig2_x_rect = {fig1.texture.width, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig2_x_rect, options, &fig2_x_idx, fig2_x_active)){ + fig2_x_active = !fig2_x_active; + } + Rectangle fig2_y_rect = {fig1.texture.width + DROPDOWN_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig2_y_rect, options, &fig2_y_idx, fig2_y_active)){ + fig2_y_active = !fig2_y_active; + } + + x = get_values(map, map_count, items[fig3_x_idx], &num_points); + y = get_values(map, map_count, items[fig3_y_idx], &num_points); + args3.x_min = ary_min(x, num_points); + args3.x_max = ary_max(x, num_points); + args3.y_min = ary_min(y, num_points); + args3.y_max = ary_max(y, num_points); + BeginTextureMode(fig3); + ClearBackground(PUFF_BACKGROUND); + plot(x, y, num_points, args3); + draw_axes(args3); + EndTextureMode(); + DrawTextureRec( + fig3.texture, + (Rectangle){ 0, 0, fig3.texture.width, -fig3.texture.height }, + (Vector2){ 0, fig1.texture.height + 2*SETTINGS_HEIGHT }, WHITE + ); + Rectangle fig3_x_rect = {0, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig3_x_rect, options, &fig3_x_idx, fig3_x_active)){ + fig3_x_active = !fig3_x_active; + } + Rectangle fig3_y_rect = {DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig3_y_rect, options, &fig3_y_idx, fig3_y_active)){ + fig3_y_active = !fig3_y_active; + } + + x = get_values(map, map_count, items[fig4_x_idx], &num_points); + y = get_values(map, map_count, items[fig4_y_idx], &num_points); + args4.x_min = ary_min(x, num_points); + args4.x_max = ary_max(x, num_points); + args4.y_min = ary_min(y, num_points); + args4.y_max = ary_max(y, num_points); + BeginTextureMode(fig4); + ClearBackground(PUFF_BACKGROUND); + plot(x, y, num_points, args4); + draw_axes(args4); + EndTextureMode(); + DrawTextureRec( + fig4.texture, + (Rectangle){ 0, 0, fig4.texture.width, -fig4.texture.height }, + (Vector2){ fig1.texture.width, fig1.texture.height + 2*SETTINGS_HEIGHT }, WHITE + ); + Rectangle fig4_x_rect = {fig1.texture.width, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig4_x_rect, options, &fig4_x_idx, fig4_x_active)){ + fig4_x_active = !fig4_x_active; + } + Rectangle fig4_y_rect = {fig1.texture.width + DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig4_y_rect, options, &fig4_y_idx, fig4_y_active)){ + fig4_y_active = !fig4_y_active; + } + + DrawFPS(GetScreenWidth() - 95, 10); EndDrawing(); } - free(x); - free(y); + //free(x); + //free(y); CloseWindow(); return 0; } From 5ca627f58561ecc7a5d8210199d94c95dd358a53 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 7 Oct 2025 19:55:05 +0000 Subject: [PATCH 035/188] fixes --- pufferlib/ocean/plot/plot.c | 65 +++++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 10 deletions(-) diff --git a/pufferlib/ocean/plot/plot.c b/pufferlib/ocean/plot/plot.c index 488614e37..074ee5038 100644 --- a/pufferlib/ocean/plot/plot.c +++ b/pufferlib/ocean/plot/plot.c @@ -345,6 +345,33 @@ int main(void) { item = item->next; } + // Create items as an array of strings + char **items = malloc(map_count * sizeof(char *)); + if (!items) { + printf("Memory allocation error\n"); + return cleanup(map, map_count, root, json_str); + } + for (int i = 0; i < map_count; i++) { + items[i] = map[i].key; // Or strdup if you need copies + } + + // Create options as a semicolon-separated string + size_t options_len = 0; + for (int i = 0; i < map_count; i++) { + options_len += strlen(map[i].key) + 1; // +1 for semicolon or null + } + char *options = malloc(options_len); + if (!options) { + printf("Memory allocation error\n"); + free(items); + return cleanup(map, map_count, root, json_str); + } + options[0] = '\0'; + for (int i = 0; i < map_count; i++) { + if (i > 0) strcat(options, ";"); + strcat(options, map[i].key); + } + // Example usage: Print the arrays // Cleanup @@ -400,19 +427,25 @@ int main(void) { bool fig4_y_active = false; int fig4_y_idx = 0; - char* items[] = {"environment/score", "cost", "train/learning_rate", "train/gamma", "train/gae_lambda"}; - char options[] = "environment/score;cost;train/learning_rate;train/gamma;train/gae_lambda"; + //char* items[] = {"environment/score", "cost", "train/learning_rate", "train/gamma", "train/gae_lambda"}; + //char options[] = "environment/score;cost;train/learning_rate;train/gamma;train/gae_lambda"; float* x; float* y; int num_points; + char* x_label; + char* y_label; while (!WindowShouldClose()) { BeginDrawing(); ClearBackground(PUFF_BACKGROUND); - x = get_values(map, map_count, items[fig1_x_idx], &num_points); - y = get_values(map, map_count, items[fig1_y_idx], &num_points); + x_label = items[fig1_x_idx]; + y_label = items[fig1_y_idx]; + args1.x_label = x_label; + args1.y_label = y_label; + x = get_values(map, map_count, x_label, &num_points); + y = get_values(map, map_count, y_label, &num_points); args1.x_min = ary_min(x, num_points); args1.x_max = ary_max(x, num_points); args1.y_min = ary_min(y, num_points); @@ -436,8 +469,12 @@ int main(void) { fig1_y_active = !fig1_y_active; } - x = get_values(map, map_count, items[fig2_x_idx], &num_points); - y = get_values(map, map_count, items[fig2_y_idx], &num_points); + x_label = items[fig2_x_idx]; + y_label = items[fig2_y_idx]; + args2.x_label = x_label; + args2.y_label = y_label; + x = get_values(map, map_count, x_label, &num_points); + y = get_values(map, map_count, y_label, &num_points); args2.x_min = ary_min(x, num_points); args2.x_max = ary_max(x, num_points); args2.y_min = ary_min(y, num_points); @@ -461,8 +498,12 @@ int main(void) { fig2_y_active = !fig2_y_active; } - x = get_values(map, map_count, items[fig3_x_idx], &num_points); - y = get_values(map, map_count, items[fig3_y_idx], &num_points); + x_label = items[fig3_x_idx]; + y_label = items[fig3_y_idx]; + args3.x_label = x_label; + args3.y_label = y_label; + x = get_values(map, map_count, x_label, &num_points); + y = get_values(map, map_count, y_label, &num_points); args3.x_min = ary_min(x, num_points); args3.x_max = ary_max(x, num_points); args3.y_min = ary_min(y, num_points); @@ -486,8 +527,12 @@ int main(void) { fig3_y_active = !fig3_y_active; } - x = get_values(map, map_count, items[fig4_x_idx], &num_points); - y = get_values(map, map_count, items[fig4_y_idx], &num_points); + x_label = items[fig4_x_idx]; + y_label = items[fig4_y_idx]; + args4.x_label = x_label; + args4.y_label = y_label; + x = get_values(map, map_count, x_label, &num_points); + y = get_values(map, map_count, y_label, &num_points); args4.x_min = ary_min(x, num_points); args4.x_max = ary_max(x, num_points); args4.y_min = ary_min(y, num_points); From d351aa41630bbf581b3398fe2d2aa215e526c36d Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 7 Oct 2025 20:00:34 +0000 Subject: [PATCH 036/188] Temp --- constellation.py | 4 ++-- scripts/build_ocean.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/constellation.py b/constellation.py index 4647667ac..8177d59ec 100644 --- a/constellation.py +++ b/constellation.py @@ -19,7 +19,7 @@ PAPER_BG_COLOR = '#061a1a' LINE_WIDTH = 4 LINE_COLORS = ["#0000b3", "#0010d9", "#0020ff", "#0040ff", "#0060ff", "#0080ff", "#009fff", "#00bfff", "#00ffff"][::-1] -roygbiv = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet', 'white'] +roygbiv = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet', 'white', 'gray'] TITLE_FONT = dict( family=FONT_FAMILY, size=FONT_SIZE_TITLE, @@ -151,7 +151,7 @@ def cached_sweep_load(path): return data -env_names = ['grid', 'moba', 'tower_climb', 'tetris', 'breakout', 'pong', 'g2048', 'snake'] +env_names = ['grid', 'moba', 'tower_climb', 'tetris', 'breakout', 'pong', 'g2048', 'snake', 'pacman'] EXPERIMENTS = { name: cached_sweep_load(f'experiments/logs/puffer_{name}') for name in env_names diff --git a/scripts/build_ocean.sh b/scripts/build_ocean.sh index 88909d44f..e854b1f74 100755 --- a/scripts/build_ocean.sh +++ b/scripts/build_ocean.sh @@ -70,7 +70,7 @@ FLAGS=( -I./$BOX2D_NAME/include -I./$BOX2D_NAME/src -I./pufferlib/extensions - "$SRC_DIR/$ENV.c" -o "$ENV" + "$SRC_DIR/cJSON.c" "$SRC_DIR/$ENV.c" -o "$ENV" $LINK_ARCHIVES -lm -lpthread From f54055671e282fa41cc32facc837dc0ce7e12994 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 7 Oct 2025 20:02:45 +0000 Subject: [PATCH 037/188] cogames --- pufferlib/config/cogames.ini | 85 +++++++++++ pufferlib/environments/cogames/__init__.py | 12 ++ pufferlib/environments/cogames/environment.py | 42 ++++++ pufferlib/environments/cogames/torch.py | 134 ++++++++++++++++++ pufferlib/pufferl.py | 6 + 5 files changed, 279 insertions(+) create mode 100644 pufferlib/config/cogames.ini create mode 100644 pufferlib/environments/cogames/__init__.py create mode 100644 pufferlib/environments/cogames/environment.py create mode 100644 pufferlib/environments/cogames/torch.py diff --git a/pufferlib/config/cogames.ini b/pufferlib/config/cogames.ini new file mode 100644 index 000000000..19e2eff39 --- /dev/null +++ b/pufferlib/config/cogames.ini @@ -0,0 +1,85 @@ +[base] +package = cogames +env_name = assembler_2_complex machina_1 training_facility_1 assembler_1_simple +policy_name = Policy +rnn_name = Recurrent + +[vec] +num_envs = 1024 +num_workers = 16 + +[env] +render_mode = auto + +[train] +total_timesteps = 300_000_000 +batch_size = auto + +adam_beta1 = 0.8923106632311335 +adam_beta2 = 0.9632470625784862 +adam_eps = 1.3537431449843922e-7 +clip_coef = 0.14919147162017737 +ent_coef = 0.016700174334611493 +gae_lambda = 0.8443676864928215 +gamma = 0.997950174315581 +learning_rate = 0.018470110879570414 +max_grad_norm = 2.572849891206465 +minibatch_size = 32768 +bptt_horizon = 64 +prio_alpha = 0.7918451491719373 +prio_beta0 = 0.5852686803034238 +vf_clip_coef = 0.1569624916309049 +vf_coef = 3.2211333828684454 +vtrace_c_clip = 2.134490283650365 +vtrace_rho_clip = 2.296343917695581 + + +#adam_beta1 = 0.8923106632311335 +#adam_beta2 = 0.9632470625784862 +#adam_eps = 1.3537431449843922e-7 +#clip_coef = 0.14919147162017737 +#ent_coef = 0.016700174334611493 +#gae_lambda = 0.8443676864928215 +#gamma = 0.997950174315581 +#learning_rate = 0.018470110879570414 +#max_grad_norm = 2.572849891206465 +#minibatch_size = 32768 +#bptt_horizon = 64 +#prio_alpha = 0.7918451491719373 +#prio_beta0 = 0.5852686803034238 +#vf_clip_coef = 0.1569624916309049 +#vf_coef = 3.2211333828684454 +#vtrace_c_clip = 2.134490283650365 +#vtrace_rho_clip = 2.296343917695581 + +[sweep] +metric = agent/heart.gained +max_cost = 500 + +[sweep.train.total_timesteps] +distribution = log_normal +min = 1e8 +max = 5e8 +mean = 3e8 +scale = auto + +[sweep.env.ore_reward] +distribution = uniform +min = 0.0 +mean = 0.25 +max = 1.0 +scale = auto + +[sweep.env.battery_reward] +distribution = uniform +min = 0.0 +mean = 0.5 +max = 1.0 +scale = auto + +[sweep.env.heart_reward] +distribution = uniform +min = 0.0 +mean = 1.0 +max = 1.0 +scale = auto diff --git a/pufferlib/environments/cogames/__init__.py b/pufferlib/environments/cogames/__init__.py new file mode 100644 index 000000000..59cda9e7c --- /dev/null +++ b/pufferlib/environments/cogames/__init__.py @@ -0,0 +1,12 @@ +from .environment import env_creator + +try: + import torch +except ImportError: + pass +else: + from .torch import Policy + try: + from .torch import Recurrent + except: + Recurrent = None diff --git a/pufferlib/environments/cogames/environment.py b/pufferlib/environments/cogames/environment.py new file mode 100644 index 000000000..4e54d23ec --- /dev/null +++ b/pufferlib/environments/cogames/environment.py @@ -0,0 +1,42 @@ +from pdb import set_trace as T +import numpy as np +import functools + +import gymnasium as gym + +import pufferlib +import pufferlib.emulation +import pufferlib.environments +from pufferlib.pufferlib import set_buffers + +from mettagrid.envs import MettaGridEnv + +def env_creator(name='machina_1'): + return functools.partial(make, name) + +def make(name, render_mode='rgb_array', buf=None, seed=0): + '''Atari creation function''' + from cogames import game + + # Load a game configuration + config = game.get_game(name) + + # Create environment + env = PufferMettaGridEnv(env_cfg=config) + set_buffers(env, buf) + return env + +class PufferMettaGridEnv(MettaGridEnv): + def reset(self, **kwargs): + obs, info = super().reset(**kwargs) + info = {k: v for k, v in pufferlib.unroll_nested_dict(info) + if 'action' not in k} + return obs, info + + def step(self, action): + obs, reward, terminated, truncated, info = super().step(action) + info = {k: v for k, v in pufferlib.unroll_nested_dict(info) + if 'action' not in k} + info = {k: v for k, v in info.items() if 'action' not in k} + return obs, reward, terminated, truncated, info + diff --git a/pufferlib/environments/cogames/torch.py b/pufferlib/environments/cogames/torch.py new file mode 100644 index 000000000..6c31428d6 --- /dev/null +++ b/pufferlib/environments/cogames/torch.py @@ -0,0 +1,134 @@ +import pufferlib.models + +from pufferlib.models import Default +#from pufferlib.models import LSTMWrapper as Recurrent + +import torch + +''' +class Policy(Default): + + def encode_observations(self, observations, state=None): + observations = observations.float() / 255 + return super().encode_observations(observations, state=state) +''' + +import numpy as np +import einops +import torch +from torch import nn +from torch.nn import functional as F + +import pufferlib.models + +class Recurrent(pufferlib.models.LSTMWrapper): + def __init__(self, env, policy, hidden_size=512): + super().__init__(env, policy, hidden_size) + +class Policy(nn.Module): + def __init__(self, env, cnn_channels=128, hidden_size=512, **kwargs): + super().__init__() + self.hidden_size = hidden_size + self.is_continuous = False + + self.out_width = 11 + self.out_height = 11 + self.num_layers = 22 + + self.network= nn.Sequential( + pufferlib.pytorch.layer_init( + nn.Conv2d(self.num_layers, cnn_channels, 5, stride=3)), + nn.ReLU(), + pufferlib.pytorch.layer_init( + nn.Conv2d(cnn_channels, cnn_channels, 3, stride=1)), + nn.ReLU(), + nn.Flatten(), + pufferlib.pytorch.layer_init(nn.Linear(cnn_channels, hidden_size//2)), + nn.ReLU(), + ) + + self.self_encoder = nn.Sequential( + pufferlib.pytorch.layer_init(nn.Linear(self.num_layers, hidden_size//2)), + nn.ReLU(), + ) + + #max_vec = torch.tensor([ 1., 9., 1., 30., 1., 3., 255., 26., 1., 1., 1., 1., + # 1., 47., 3., 3., 2., 1., 1., 1., 1., 1.])[None, :, None, None] + max_vec = torch.tensor([9., 1., 1., 10., 3., 254., 1., 1., 235., 8., 9., 250., 29., 1., 1., 8., 1., 1., 6., 3., 1., 2.])[None, :, None, None] + #max_vec = torch.ones(22)[None, :, None, None] + self.register_buffer('max_vec', max_vec) + + action_nvec = env.single_action_space.nvec + self.actor = nn.ModuleList([pufferlib.pytorch.layer_init( + nn.Linear(hidden_size, n), std=0.01) for n in action_nvec]) + + self.value = pufferlib.pytorch.layer_init( + nn.Linear(hidden_size, 1), std=1) + + def forward(self, observations, state=None): + hidden, lookup = self.encode_observations(observations) + actions, value = self.decode_actions(hidden, lookup) + return (actions, value), hidden + + def encode_observations(self, observations, state=None): + + token_observations = observations + B = token_observations.shape[0] + TT = 1 + if token_observations.dim() != 3: # hardcoding for shape [B, M, 3] + TT = token_observations.shape[1] + token_observations = einops.rearrange(token_observations, "b t m c -> (b t) m c") + + assert token_observations.shape[-1] == 3, f"Expected 3 channels per token. Got shape {token_observations.shape}" + token_observations[token_observations == 255] = 0 + + # coords_byte contains x and y coordinates in a single byte (first 4 bits are x, last 4 bits are y) + coords_byte = token_observations[..., 0].to(torch.uint8) + + # Extract x and y coordinate indices (0-15 range, but we need to make them long for indexing) + x_coord_indices = ((coords_byte >> 4) & 0x0F).long() # Shape: [B_TT, M] + y_coord_indices = (coords_byte & 0x0F).long() # Shape: [B_TT, M] + atr_indices = token_observations[..., 1].long() # Shape: [B_TT, M], ready for embedding + atr_values = token_observations[..., 2].float() # Shape: [B_TT, M] + + # In ObservationShaper we permute. Here, we create the observations pre-permuted. + # We'd like to pre-create this as part of initialization, but we don't know the batch size or time steps at + # that point. + box_obs = torch.zeros( + (B * TT, 22, self.out_width, self.out_height), + dtype=atr_values.dtype, + device=token_observations.device, + ) + batch_indices = torch.arange(B * TT, device=token_observations.device).unsqueeze(-1).expand_as(atr_values) + + # Add bounds checking to prevent out-of-bounds access + valid_tokens = coords_byte != 0xFF + valid_tokens = valid_tokens & (x_coord_indices < self.out_width) & (y_coord_indices < self.out_height) + valid_tokens = valid_tokens & (atr_indices < 22) # Also check attribute indices + + box_obs[ + batch_indices[valid_tokens], + atr_indices[valid_tokens], + x_coord_indices[valid_tokens], + y_coord_indices[valid_tokens], + ] = atr_values[valid_tokens] + + observations = box_obs + + #max_vec = box_obs.max(0)[0].max(1)[0].max(1)[0] + #self.max_vec = torch.maximum(self.max_vec, max_vec[None, :, None, None]) + #if (np.random.rand() < 0.001): + # breakpoint() + + features = observations / self.max_vec + #mmax = features.max(0)[0].max(1)[0].max(1)[0] + #self.max_vec = torch.maximum(self.max_vec, mmax[None, :, None, None]) + self_features = self.self_encoder(features[:, :, 5, 5]) + cnn_features = self.network(features) + return torch.cat([self_features, cnn_features], dim=1) + + def decode_actions(self, hidden): + #hidden = self.layer_norm(hidden) + logits = [dec(hidden) for dec in self.actor] + value = self.value(hidden) + return logits, value diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index f65880c3c..893cfcb62 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -837,6 +837,12 @@ def __init__(self, args, load_id=None, mode='async'): for k, v in pufferlib.unroll_nested_dict(args): neptune[k].append(v) + def init(self, args): + pass + + def log_cost(self, cost): + pass + def log(self, logs, step): for k, v in logs.items(): self.neptune[k].append(v, step=step) From cdab2785b9537c66df81b568709a8f229af2bea4 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 7 Oct 2025 21:45:33 +0000 Subject: [PATCH 038/188] 3d --- pufferlib/ocean/plot/plot.c | 117 +++++++++++++++++++++++++++++++----- 1 file changed, 103 insertions(+), 14 deletions(-) diff --git a/pufferlib/ocean/plot/plot.c b/pufferlib/ocean/plot/plot.c index 074ee5038..b41aa911e 100644 --- a/pufferlib/ocean/plot/plot.c +++ b/pufferlib/ocean/plot/plot.c @@ -1,8 +1,10 @@ +#include #include #include #include #include "raylib.h" + #define RAYGUI_IMPLEMENTATION #include "raygui.h" @@ -17,6 +19,7 @@ const float EMPTY = -4242.0f; #define SEP 4 #define SETTINGS_HEIGHT 20 +#define TOGGLE_WIDTH 60 #define DROPDOWN_WIDTH 200 typedef struct PlotArgs { @@ -40,6 +43,7 @@ typedef struct PlotArgs { Color axis_color; char* x_label; char* y_label; + char* z_label; Font font; } PlotArgs; @@ -64,12 +68,9 @@ PlotArgs DEFAULT_PLOT_ARGS = { .axis_color = PUFF_WHITE, .x_label = "Cost", .y_label = "Score", + .z_label = "Train/Learning Rate", }; -#include -#include -#include - const char* format_tick_label(double value) { static char buffer[32]; int precision = 2; @@ -180,6 +181,24 @@ void draw_axes(PlotArgs args) { } } +void draw_axes3(PlotArgs args) { + DrawLine3D( + (Vector3){-10.0f, 0, 0}, + (Vector3){10.0f, 0, 0}, + RED + ); + DrawLine3D( + (Vector3){0, -10.0f, 0}, + (Vector3){0, 10.0f, 0}, + GREEN + ); + DrawLine3D( + (Vector3){0, 0, -10.0f}, + (Vector3){0, 0, 10.0f}, + BLUE + ); +} + float ary_min(float* ary, int num) { float min = ary[0]; for (int i=1; i 20) { + map_count = 20; + } char **items = malloc(map_count * sizeof(char *)); if (!items) { printf("Memory allocation error\n"); @@ -389,25 +445,34 @@ int main(void) { // Initialize Raylib - InitWindow(2*DEFAULT_PLOT_ARGS.width, 2*DEFAULT_PLOT_ARGS.height + 2*SETTINGS_HEIGHT, "Puffer Constellation"); ClearBackground(PUFF_BACKGROUND); SetTargetFPS(60); - + Camera3D camera = (Camera3D){ 0 }; + camera.position = (Vector3){ 10.0f, 10.0f, 10.0f }; + camera.target = (Vector3){ 0.0f, 0.0f, 0.0f }; + camera.up = (Vector3){ 0.0f, 1.0f, 0.0f }; + camera.fovy = 45.0f; + camera.projection = CAMERA_PERSPECTIVE; PlotArgs args1 = DEFAULT_PLOT_ARGS; args1.font = GetFontDefault(); RenderTexture2D fig1 = LoadRenderTexture(args1.width, args1.height); bool fig1_x_active = false; - int fig1_x_idx = 1; + int fig1_x_idx = 2; + bool fig1_x_log = true; bool fig1_y_active = false; - int fig1_y_idx = 0; + int fig1_y_idx = 6; + bool fig1_y_log = false; + bool fig1_z_active = false; + int fig1_z_idx = 1; + bool fig1_z_log = true; PlotArgs args2 = DEFAULT_PLOT_ARGS; args2.font = GetFontDefault(); RenderTexture2D fig2 = LoadRenderTexture(args2.width, args2.height); bool fig2_x_active = false; - int fig2_x_idx = 2; + int fig2_x_idx = 1; bool fig2_y_active = false; int fig2_y_idx = 0; @@ -432,9 +497,11 @@ int main(void) { float* x; float* y; + float* z; int num_points; char* x_label; char* y_label; + char* z_label; while (!WindowShouldClose()) { BeginDrawing(); @@ -442,32 +509,54 @@ int main(void) { x_label = items[fig1_x_idx]; y_label = items[fig1_y_idx]; + z_label = items[fig1_z_idx]; args1.x_label = x_label; args1.y_label = y_label; + args1.z_label = z_label; x = get_values(map, map_count, x_label, &num_points); y = get_values(map, map_count, y_label, &num_points); + z = get_values(map, map_count, z_label, &num_points); args1.x_min = ary_min(x, num_points); args1.x_max = ary_max(x, num_points); args1.y_min = ary_min(y, num_points); args1.y_max = ary_max(y, num_points); + args1.z_min = ary_min(z, num_points); + args1.z_max = ary_max(z, num_points); + float x_mid = fig1_x_log ? (log10(args1.x_max) + log10(args1.x_min))/2.0f : (args1.x_max + args1.x_min)/2.0f; + float y_mid = fig1_y_log ? (log10(args1.y_max) + log10(args1.y_min))/2.0f : (args1.y_max + args1.y_min)/2.0f; + float z_mid = fig1_z_log ? (log10(args1.z_max) + log10(args1.z_min))/2.0f : (args1.z_max + args1.z_min)/2.0f; + camera.target = (Vector3){x_mid, y_mid, z_mid}; BeginTextureMode(fig1); ClearBackground(PUFF_BACKGROUND); - plot(x, y, num_points, args1); - draw_axes(args1); + BeginMode3D(camera); + UpdateCamera(&camera, CAMERA_ORBITAL); + plot3(x, y, z, fig1_x_log, fig1_y_log, fig1_z_log, num_points, args1); + draw_axes3(args1); + EndMode3D(); EndTextureMode(); DrawTextureRec( fig1.texture, - (Rectangle){ 0, 0, fig1.texture.width, -fig1.texture.height }, + (Rectangle){0, 0, fig1.texture.width, -fig1.texture.height }, (Vector2){ 0, SETTINGS_HEIGHT }, WHITE ); Rectangle fig1_x_rect = {0, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; if (GuiDropdownBox(fig1_x_rect, options, &fig1_x_idx, fig1_x_active)){ fig1_x_active = !fig1_x_active; } - Rectangle fig1_y_rect = {DROPDOWN_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + Rectangle fig1_x_check_rect = {DROPDOWN_WIDTH, 0, SETTINGS_HEIGHT, SETTINGS_HEIGHT}; + GuiCheckBox(fig1_x_check_rect, "Log X", &fig1_x_log); + Rectangle fig1_y_rect = {DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; if (GuiDropdownBox(fig1_y_rect, options, &fig1_y_idx, fig1_y_active)){ fig1_y_active = !fig1_y_active; } + Rectangle fig1_y_check_rect = {2*DROPDOWN_WIDTH+TOGGLE_WIDTH, 0, SETTINGS_HEIGHT, SETTINGS_HEIGHT}; + GuiCheckBox(fig1_y_check_rect, "Log Y", &fig1_y_log); + Rectangle fig1_z_rect = {2*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig1_z_rect, options, &fig1_z_idx, fig1_z_active)){ + fig1_z_active = !fig1_z_active; + } + Rectangle fig1_z_check_rect = {3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, SETTINGS_HEIGHT, SETTINGS_HEIGHT}; + GuiCheckBox(fig1_z_check_rect, "Log Z", &fig1_z_log); x_label = items[fig2_x_idx]; y_label = items[fig2_y_idx]; @@ -486,7 +575,7 @@ int main(void) { EndTextureMode(); DrawTextureRec( fig2.texture, - (Rectangle){0, 0, fig2.texture.width, -fig2.texture.height }, + (Rectangle){ 0, 0, fig2.texture.width, -fig2.texture.height }, (Vector2){ fig1.texture.width, SETTINGS_HEIGHT }, WHITE ); Rectangle fig2_x_rect = {fig1.texture.width, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; From 58e34236d956f0e0834a8ac2a0b6753d5cbf5a7f Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 8 Oct 2025 14:56:25 +0000 Subject: [PATCH 039/188] Cap snake score --- pufferlib/ocean/snake/snake.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/ocean/snake/snake.h b/pufferlib/ocean/snake/snake.h index 30a9a4d40..e7d31b832 100644 --- a/pufferlib/ocean/snake/snake.h +++ b/pufferlib/ocean/snake/snake.h @@ -232,7 +232,7 @@ void step_snake(CSnake* env, int i) { env->rewards[i] = env->reward_death; env->snake_logs[i].episode_return += env->reward_death; env->snake_logs[i].score = env->snake_lengths[i]; - env->snake_logs[i].perf = env->snake_logs[i].score / env->snake_logs[i].episode_length; + env->snake_logs[i].perf = fminf(env->snake_logs[i].score/120.0f, 1.0f); add_log(env, i); spawn_snake(env, i); return; From a09a8cf8a28a7ec800c9983208e97da2e4f70fa5 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 8 Oct 2025 15:26:56 +0000 Subject: [PATCH 040/188] rware config --- pufferlib/config/ocean/rware.ini | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pufferlib/config/ocean/rware.ini b/pufferlib/config/ocean/rware.ini index 705e0af3e..791c426f4 100644 --- a/pufferlib/config/ocean/rware.ini +++ b/pufferlib/config/ocean/rware.ini @@ -5,10 +5,10 @@ policy_name = Policy rnn_name = Recurrent [vec] -num_envs = 8 +num_envs = 4 [env] -num_envs = 128 +num_envs = 256 map_choice = 2 num_agents = 8 num_requested_shelves = 8 @@ -17,10 +17,3 @@ num_requested_shelves = 8 total_timesteps = 100_000_000 learning_rate = 0.05 minibatch_size = 32768 - -[sweep.train.total_timesteps] -distribution = log_normal -min = 3e7 -max = 3e8 -mean = 1e8 -scale = 0.25 From eb3fec44732d0684bf4c3dba3690b67572e2ddb2 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 8 Oct 2025 19:44:28 +0000 Subject: [PATCH 041/188] Two small profile scripts --- profile_jax.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ profile_torch.py | 64 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 profile_jax.py create mode 100644 profile_torch.py diff --git a/profile_jax.py b/profile_jax.py new file mode 100644 index 000000000..8d5d51a43 --- /dev/null +++ b/profile_jax.py @@ -0,0 +1,67 @@ +import jax +import jax.numpy as jnp +from jax import jit, random, lax +import timeit + +INPUT_SIZE = 16 +HIDDEN_SIZE = 128 +OUTPUT_SIZE = 16 +B = 2048 +dtype = jnp.bfloat16 +inner_loops = 100 # Number of inner iterations to amortize overhead + +def init_params(key): + keys = random.split(key, 3) + # Use uniform initialization to match PyTorch's Kaiming uniform for ReLU + bound1 = jnp.sqrt(6 / INPUT_SIZE) + w1 = random.uniform(keys[0], shape=(INPUT_SIZE, HIDDEN_SIZE), minval=-bound1, maxval=bound1, dtype=dtype) + b1 = jnp.zeros(HIDDEN_SIZE, dtype=dtype) + bound2 = jnp.sqrt(6 / HIDDEN_SIZE) + w2 = random.uniform(keys[1], shape=(HIDDEN_SIZE, HIDDEN_SIZE), minval=-bound2, maxval=bound2, dtype=dtype) + b2 = jnp.zeros(HIDDEN_SIZE, dtype=dtype) + bound3 = jnp.sqrt(6 / HIDDEN_SIZE) + w3 = random.uniform(keys[2], shape=(HIDDEN_SIZE, OUTPUT_SIZE), minval=-bound3, maxval=bound3, dtype=dtype) + b3 = jnp.zeros(OUTPUT_SIZE, dtype=dtype) + return {'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2, 'w3': w3, 'b3': b3} + +def model(params, x): + precision = lax.Precision.HIGH # Use HIGH precision for 4090 to leverage Tensor Cores + h = jnp.maximum(jnp.dot(x, params['w1'], precision=precision) + params['b1'], 0) + h = jnp.maximum(jnp.dot(h, params['w2'], precision=precision) + params['b2'], 0) + return jnp.dot(h, params['w3'], precision=precision) + params['b3'] + +# Manual FLOPs calculation (ignores bias adds and ReLUs as negligible) +flops_per_forward = ( + 2 * B * INPUT_SIZE * HIDDEN_SIZE + # First matmul + 2 * B * HIDDEN_SIZE * HIDDEN_SIZE + # Second matmul + 2 * B * HIDDEN_SIZE * OUTPUT_SIZE # Third matmul +) + +# Create concrete inputs +key = random.key(0) +params = init_params(key) +batch = random.normal(random.key(1), (B, INPUT_SIZE), dtype=dtype) + +# Define a jitted multi-step function with lax.scan for better optimization +@jit +def multi_step(params, batch): + def body_fun(carry, _): + y = model(params, batch) + carry += y.sum() # Forces computation without noise + return carry, None + carry, _ = lax.scan(body_fun, jnp.array(0.0, dtype=jnp.float32), None, length=inner_loops) + return carry + +# Warmup +for _ in range(10): + _ = multi_step(params, batch).block_until_ready() + +# Timing +def run(): + return multi_step(params, batch).block_until_ready() + +t = timeit.timeit(run, number=10) +cost = t / 10 / inner_loops # Average time per forward pass + +FLOPS = flops_per_forward / cost +print(f'TFLOPS: {FLOPS / 1e12:.2f}') diff --git a/profile_torch.py b/profile_torch.py new file mode 100644 index 000000000..e144a1127 --- /dev/null +++ b/profile_torch.py @@ -0,0 +1,64 @@ +import torch +from torch import nn +from torch.utils.benchmark import Timer +from torch.utils.flop_counter import FlopCounterMode + +from torch.backends import cudnn +cudnn.benchmark = True +cudnn.deterministic = False +cudnn.benchmark_limit = 32 + +torch.set_float32_matmul_precision('high') +torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True + +INPUT_SIZE = 16 +HIDDEN_SIZE = 128 +OUTPUT_SIZE = 16 +B = 2048 +dtype = torch.bfloat16 +inner_loops = 100 # Number of inner iterations to amortize overhead + +# Define the model with explicit Kaiming uniform initialization to match JAX +model = torch.nn.Sequential( + torch.nn.Linear(INPUT_SIZE, HIDDEN_SIZE), + torch.nn.ReLU(), + torch.nn.Linear(HIDDEN_SIZE, HIDDEN_SIZE), + torch.nn.ReLU(), + torch.nn.Linear(HIDDEN_SIZE, OUTPUT_SIZE), +).cuda().to(dtype) + +# Create input batch +batch = torch.randn(B, INPUT_SIZE).cuda().to(dtype) + +# Define a multi-step function to run multiple forwards in one compiled graph +@torch.compile(mode='max-autotune') +def multi_step(model, batch, inner_loops): + with torch.no_grad(): + carry = torch.tensor(0.0, dtype=torch.float32, device='cuda') + for i in range(inner_loops): + y = model(batch) + carry = carry + y.sum() + + return carry + +# Manual FLOPs calculation to match JAX (ignores bias adds and ReLUs as negligible) +flops = ( + 2 * B * INPUT_SIZE * HIDDEN_SIZE + + 2 * B * HIDDEN_SIZE * HIDDEN_SIZE + + 2 * B * HIDDEN_SIZE * OUTPUT_SIZE +) + +# Warmup +for _ in range(10): + _ = multi_step(model, batch, inner_loops) + +# Timing +timer = Timer( + stmt='multi_step(model, batch, inner_loops)', + globals={'multi_step': multi_step, 'model': model, 'batch': batch, 'inner_loops': inner_loops} +) +output = timer.timeit(50) + +cost = output.mean / inner_loops # Average time per forward pass (fixed from times[0] to mean) +FLOPS = flops / cost +print(f'TFLOPS: {FLOPS / 1e12:.2f}') From 1c4fcf876de2490ce8f118be4a3ba9dd02995fd2 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 8 Oct 2025 20:25:22 +0000 Subject: [PATCH 042/188] update cogames --- pufferlib/config/cogames.ini | 2 +- pufferlib/environments/cogames/environment.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pufferlib/config/cogames.ini b/pufferlib/config/cogames.ini index 19e2eff39..1f556efaf 100644 --- a/pufferlib/config/cogames.ini +++ b/pufferlib/config/cogames.ini @@ -1,6 +1,6 @@ [base] package = cogames -env_name = assembler_2_complex machina_1 training_facility_1 assembler_1_simple +env_name = assembler_2_complex machina_1 training_facility_1 assembler_1_simple training_rotation_easy_shaped policy_name = Policy rnn_name = Recurrent diff --git a/pufferlib/environments/cogames/environment.py b/pufferlib/environments/cogames/environment.py index 4e54d23ec..61f51e56f 100644 --- a/pufferlib/environments/cogames/environment.py +++ b/pufferlib/environments/cogames/environment.py @@ -19,7 +19,7 @@ def make(name, render_mode='rgb_array', buf=None, seed=0): from cogames import game # Load a game configuration - config = game.get_game(name) + config = game.get_mission(name)[0] # Create environment env = PufferMettaGridEnv(env_cfg=config) From 86dcf77c12be6d1e02f9880e6e999106c6dcab76 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 9 Oct 2025 14:50:51 +0000 Subject: [PATCH 043/188] Impulse wars config --- pufferlib/config/ocean/impulse_wars.ini | 85 +++---------------------- pufferlib/ocean/torch.py | 7 +- 2 files changed, 13 insertions(+), 79 deletions(-) diff --git a/pufferlib/config/ocean/impulse_wars.ini b/pufferlib/config/ocean/impulse_wars.ini index d4d1ad7b5..ddb8d3de5 100644 --- a/pufferlib/config/ocean/impulse_wars.ini +++ b/pufferlib/config/ocean/impulse_wars.ini @@ -7,7 +7,6 @@ max_suggestion_cost = 10_800 [policy] cnn_channels = 64 -input_size = 512 hidden_size = 512 # These must match what's set in env below @@ -16,12 +15,12 @@ num_drones = 2 is_training = True [vec] -num_envs = 16 -num_workers = 16 -batch_size = 4 +num_envs = 4 +#num_workers = 4 +#batch_size = 4 [env] -num_envs = 256 +num_envs = 1024 num_drones = 2 num_agents = 1 enable_teams = False @@ -40,79 +39,15 @@ compile_mode = reduce-overhead compile_fullgraph = False device = cuda -[sweep.env.num_envs] -distribution = uniform_pow2 -min = 16 -max = 512 -mean = 128 -scale = auto - -[sweep.train.total_timesteps] -distribution = log_normal -min = 250_000_000 -max = 1_500_000_000 -mean = 500_000_000 -scale = time +[sweep] +downsample = 10 +max_cost = 900 -[sweep.train.batch_size] -distribution = uniform_pow2 -min = 65_536 -max = 1_048_576 -mean = 262_144 -scale = auto - -[sweep.train.bptt_horizon] +[sweep.env.num_envs] distribution = uniform_pow2 -min = 64 -max = 256 +min = 1 +max = 1024 mean = 128 scale = auto -[sweep.train.minibatch_size] -distribution = uniform_pow2 -min = 1024 -max = 262_144 -mean = 16_384 -scale = auto -[sweep.train.learning_rate] -distribution = log_normal -min = 0.00001 -mean = 0.001 -max = 0.1 -scale = 0.5 - -[sweep.train.ent_coef] -distribution = log_normal -min = 0.000001 -mean = 0.001 -max = 0.2 -scale = auto - -[sweep.train.gamma] -distribution = logit_normal -min = 0.8 -mean = 0.98 -max = 0.99999 -scale = auto - -[sweep.train.gae_lambda] -distribution = logit_normal -min = 0.6 -mean = 0.93 -max = 0.995 -scale = auto - -[sweep.train.vf_coef] -distribution = uniform -min = 0.0 -max = 5.0 -mean = 1.0 -scale = auto - -[sweep.train.max_grad_norm] -distribution = uniform -min = 0.0 -mean = 1.0 -max = 5.0 -scale = auto diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index f975b5c22..3dd3ca697 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -582,8 +582,8 @@ def decode_actions(self, flat_hidden): class ImpulseWarsLSTM(Recurrent): - def __init__(self, env: pufferlib.PufferEnv, policy: nn.Module, input_size: int = 512, hidden_size: int = 512): - super().__init__(env, policy, input_size, hidden_size) + def __init__(self, env, policy, hidden_size=512, **kwargs): + super().__init__(env, policy, hidden_size) class ImpulseWarsPolicy(nn.Module): @@ -592,7 +592,6 @@ def __init__( env: pufferlib.PufferEnv, cnn_channels: int = 64, weapon_type_embedding_dims: int = 2, - input_size: int = 512, hidden_size: int = 512, batch_size: int = 131_072, num_drones: int = 2, @@ -686,7 +685,7 @@ def __init__( ) self.encoder = nn.Sequential( - layer_init(nn.Linear(featuresSize, input_size)), + layer_init(nn.Linear(featuresSize, hidden_size)), nn.ReLU(), ) From f94dc05037b9404f63f9be472b4ae0ac77572b4d Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 9 Oct 2025 14:57:30 +0000 Subject: [PATCH 044/188] nmmo3 config --- pufferlib/config/ocean/nmmo3.ini | 12 +++--------- pufferlib/ocean/torch.py | 4 ++-- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/pufferlib/config/ocean/nmmo3.ini b/pufferlib/config/ocean/nmmo3.ini index c04c77dc3..f1440da7f 100644 --- a/pufferlib/config/ocean/nmmo3.ini +++ b/pufferlib/config/ocean/nmmo3.ini @@ -5,7 +5,7 @@ policy_name = NMMO3 rnn_name = NMMO3LSTM [vec] -num_envs = 8 +num_envs = 1 [env] reward_combat_level = 1.0 @@ -13,7 +13,7 @@ reward_prof_level = 1.0 reward_item_level = 1.0 reward_market = 0.0 reward_death = -1.0 -num_envs = 1 +num_envs = 8 [train] total_timesteps = 107000000000 @@ -31,6 +31,7 @@ max_minibatch_size = 32768 [sweep] metric = min_comb_prof +max_cost = 900 [sweep.env.num_envs] distribution = uniform_pow2 @@ -39,13 +40,6 @@ max = 8 mean = 4 scale = 0.5 -[sweep.train.total_timesteps] -distribution = log_normal -min = 2e8 -max = 1e9 -mean = 5e8 -scale = 0.5 - [sweep.env.reward_combat_level] distribution = uniform min = 0.0 diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index 3dd3ca697..e92f08914 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -53,8 +53,8 @@ def decode_actions(self, flat_hidden, state=None): return action, value class NMMO3LSTM(pufferlib.models.LSTMWrapper): - def __init__(self, env, policy, input_size=512, hidden_size=512): - super().__init__(env, policy, input_size, hidden_size) + def __init__(self, env, policy, hidden_size=512): + super().__init__(env, policy, hidden_size) class NMMO3(nn.Module): def __init__(self, env, hidden_size=512, output_size=512, **kwargs): From bcb6a9fe6fcee02c09bb1d780f778f9c9f9f64ed Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 9 Oct 2025 15:00:05 +0000 Subject: [PATCH 045/188] nmmo3 net params --- pufferlib/ocean/torch.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index e92f08914..8990563c2 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -57,7 +57,7 @@ def __init__(self, env, policy, hidden_size=512): super().__init__(env, policy, hidden_size) class NMMO3(nn.Module): - def __init__(self, env, hidden_size=512, output_size=512, **kwargs): + def __init__(self, env, hidden_size=512, **kwargs): super().__init__() self.hidden_size = hidden_size #self.dtype = pufferlib.pytorch.nativize_dtype(env.emulated) @@ -88,8 +88,8 @@ def __init__(self, env, hidden_size=512, output_size=512, **kwargs): self.layer_norm = nn.LayerNorm(hidden_size) self.actor = pufferlib.pytorch.layer_init( - nn.Linear(output_size, self.num_actions), std=0.01) - self.value_fn = pufferlib.pytorch.layer_init(nn.Linear(output_size, 1), std=1) + nn.Linear(hidden_size, self.num_actions), std=0.01) + self.value_fn = pufferlib.pytorch.layer_init(nn.Linear(hidden_size, 1), std=1) def forward(self, x, state=None): hidden = self.encode_observations(x) From af075046516c32d991cab1c8aebdc5b89d62736f Mon Sep 17 00:00:00 2001 From: Andrew LeFevre Date: Wed, 20 Aug 2025 18:04:12 -0400 Subject: [PATCH 046/188] fuck this --- pufferlib/ocean/impulse_wars/binding.c | 2 + pufferlib/ocean/impulse_wars/game.h | 204 +++++++++++++++++-------- pufferlib/ocean/impulse_wars/types.h | 2 + 3 files changed, 143 insertions(+), 65 deletions(-) diff --git a/pufferlib/ocean/impulse_wars/binding.c b/pufferlib/ocean/impulse_wars/binding.c index d51837b73..13d0880b5 100644 --- a/pufferlib/ocean/impulse_wars/binding.c +++ b/pufferlib/ocean/impulse_wars/binding.c @@ -131,6 +131,8 @@ static int my_log(PyObject *dict, Log *log) { assign_to_dict(dict, droneLog(buf, i, "total_bursts"), log->stats[i].totalBursts); assign_to_dict(dict, droneLog(buf, i, "bursts_hit"), log->stats[i].burstsHit); assign_to_dict(dict, droneLog(buf, i, "energy_emptied"), log->stats[i].energyEmptied); + assign_to_dict(dict, droneLog(buf, i, "self_kills"), log->stats[i].selfKills); + assign_to_dict(dict, droneLog(buf, i, "kills"), log->stats[i].kills); assign_to_dict(dict, droneLog(buf, i, "wins"), log->stats[i].wins); // useful for debugging weapon balance, but really slows down diff --git a/pufferlib/ocean/impulse_wars/game.h b/pufferlib/ocean/impulse_wars/game.h index 7a8260d23..65e76f12e 100644 --- a/pufferlib/ocean/impulse_wars/game.h +++ b/pufferlib/ocean/impulse_wars/game.h @@ -102,10 +102,12 @@ bool isOverlappingAABB(const iwEnv *e, const b2Vec2 pos, const float distance, c return ctx.overlaps; } -// TODO: store a shape proxy in entities? -b2ShapeProxy makeDistanceProxyFromType(const enum entityType type, bool *isCircle) { +b2ShapeProxy makeDistanceProxy(const entity *ent, bool *isCircle) { b2ShapeProxy proxy = {0}; - switch (type) { + float extent = 0.0f; + wallEntity *wall = NULL; + + switch (ent->type) { case DRONE_ENTITY: *isCircle = true; proxy.count = 1; @@ -116,42 +118,44 @@ b2ShapeProxy makeDistanceProxyFromType(const enum entityType type, bool *isCircl proxy.count = 1; proxy.radius = DRONE_SHIELD_RADIUS; break; + case PROJECTILE_ENTITY: + *isCircle = true; + const projectileEntity *proj = ent->entity; + proxy.count = 1; + proxy.radius = proj->weaponInfo->radius; + break; case WEAPON_PICKUP_ENTITY: + extent = PICKUP_THICKNESS / 2.0f; + proxy.count = 4; - proxy.points[0] = (b2Vec2){.x = -PICKUP_THICKNESS / 2.0f, .y = -PICKUP_THICKNESS / 2.0f}; - proxy.points[1] = (b2Vec2){.x = -PICKUP_THICKNESS / 2.0f, .y = +PICKUP_THICKNESS / 2.0f}; - proxy.points[2] = (b2Vec2){.x = +PICKUP_THICKNESS / 2.0f, .y = -PICKUP_THICKNESS / 2.0f}; - proxy.points[3] = (b2Vec2){.x = +PICKUP_THICKNESS / 2.0f, .y = +PICKUP_THICKNESS / 2.0f}; + proxy.points[0] = (b2Vec2){.x = -extent, .y = -extent}; + proxy.points[1] = (b2Vec2){.x = -extent, .y = +extent}; + proxy.points[2] = (b2Vec2){.x = +extent, .y = -extent}; + proxy.points[3] = (b2Vec2){.x = +extent, .y = +extent}; break; case STANDARD_WALL_ENTITY: case BOUNCY_WALL_ENTITY: case DEATH_WALL_ENTITY: + extent = WALL_THICKNESS; + wall = ent->entity; + if (wall->isFloating) { + extent = FLOATING_WALL_THICKNESS; + } + extent /= 2.0f; + proxy.count = 4; - proxy.points[0] = (b2Vec2){.x = -FLOATING_WALL_THICKNESS / 2.0f, .y = -FLOATING_WALL_THICKNESS / 2.0f}; - proxy.points[1] = (b2Vec2){.x = -FLOATING_WALL_THICKNESS / 2.0f, .y = +FLOATING_WALL_THICKNESS / 2.0f}; - proxy.points[2] = (b2Vec2){.x = +FLOATING_WALL_THICKNESS / 2.0f, .y = -FLOATING_WALL_THICKNESS / 2.0f}; - proxy.points[3] = (b2Vec2){.x = +FLOATING_WALL_THICKNESS / 2.0f, .y = +FLOATING_WALL_THICKNESS / 2.0f}; + proxy.points[0] = (b2Vec2){.x = -extent, .y = -extent}; + proxy.points[1] = (b2Vec2){.x = -extent, .y = +extent}; + proxy.points[2] = (b2Vec2){.x = +extent, .y = -extent}; + proxy.points[3] = (b2Vec2){.x = +extent, .y = +extent}; break; default: - ERRORF("unknown entity type for shape distance: %d", type); + ERRORF("unknown entity type for shape distance: %d", ent->type); } return proxy; } -b2ShapeProxy makeDistanceProxy(const entity *ent, bool *isCircle) { - if (ent->type == PROJECTILE_ENTITY) { - *isCircle = true; - b2ShapeProxy proxy = {0}; - const projectileEntity *proj = ent->entity; - proxy.count = 1; - proxy.radius = proj->weaponInfo->radius; - return proxy; - } - - return makeDistanceProxyFromType(ent->type, isCircle); -} - b2Transform entityTransform(const entity *ent) { b2Transform transform; wallEntity *wall; @@ -506,6 +510,7 @@ entity *createWall(iwEnv *e, const b2Vec2 pos, const float width, const float he if (floating) { cc_array_add(e->floatingWalls, wall); + create_array(&wall->physicsTracking, 16); } else { cc_array_add(e->walls, wall); } @@ -521,6 +526,14 @@ void destroyWall(iwEnv *e, wallEntity *wall, const bool full) { cell->ent = NULL; } + if (wall->isFloating) { + for (size_t i = 0; i < cc_array_size(wall->physicsTracking); i++) { + physicsStepInfo *physicsStep = safe_array_get_at(wall->physicsTracking, i); + fastFree(physicsStep); + } + cc_array_destroy(wall->physicsTracking); + } + b2DestroyBody(wall->bodyID); fastFree(wall); } @@ -841,6 +854,7 @@ void destroyDroneShield(iwEnv *e, shieldEntity *shield, const bool createPieces) droneAddEnergy(drone, DRONE_SHIELD_BREAK_ENERGY_COST); } drone->shield = NULL; + e->stats[drone->idx].ownShieldBroken++; b2DestroyBody(shield->bodyID); b2DestroyShape(shield->bufferShapeID, false); @@ -880,27 +894,27 @@ void destroyDrone(iwEnv *e, droneEntity *drone) { fastFree(drone); } -void droneApplyForce(const iwEnv *e, const droneEntity *drone, const b2Vec2 force, const uint8_t srcIdx) { - b2Body_ApplyForceToCenter(drone->bodyID, force, true); +void applyTrackedForce(const iwEnv *e, const b2BodyId bodyID, CC_Array *physicsTracking, const b2Vec2 force, const uint8_t srcIdx) { + b2Body_ApplyForceToCenter(bodyID, force, true); physicsStepInfo *physicsStep = fastCalloc(1, sizeof(physicsStepInfo)); physicsStep->srcIdx = srcIdx; physicsStep->force = force; physicsStep->step = e->episodeLength; - cc_array_add(drone->physicsTracking, physicsStep); + cc_array_add(physicsTracking, physicsStep); } -void droneTrackImpulse(const iwEnv *e, const droneEntity *drone, const b2Vec2 impulse, const uint8_t srcIdx) { +void trackImpulse(const iwEnv *e, CC_Array *physicsTracking, const b2Vec2 impulse, const uint8_t srcIdx) { physicsStepInfo *physicsStep = fastCalloc(1, sizeof(physicsStepInfo)); physicsStep->srcIdx = srcIdx; physicsStep->impulse = impulse; physicsStep->step = e->episodeLength; - cc_array_add(drone->physicsTracking, physicsStep); + cc_array_add(physicsTracking, physicsStep); } -void droneApplyImpulse(const iwEnv *e, const droneEntity *drone, const b2Vec2 impulse, const uint8_t srcIdx) { - b2Body_ApplyLinearImpulseToCenter(drone->bodyID, impulse, true); - droneTrackImpulse(e, drone, impulse, srcIdx); +void applyTrackedImpulse(const iwEnv *e, const b2BodyId bodyID, CC_Array *physicsTracking, const b2Vec2 impulse, const uint8_t srcIdx) { + b2Body_ApplyLinearImpulseToCenter(bodyID, impulse, true); + trackImpulse(e, physicsTracking, impulse, srcIdx); } void droneTrackBrake(const iwEnv *e, const droneEntity *drone) { @@ -921,16 +935,20 @@ void droneChangeWeapon(const iwEnv *e, droneEntity *drone, const enum weaponType drone->ammo = weaponAmmo(e->defaultWeapon->type, drone->weaponInfo->type); } -void findDroneKiller(const iwEnv *e, droneEntity *drone) { +int8_t findBiggestContributor(iwEnv *e, const enum entityType type, const CC_Array *physicsTracking, const b2Vec2 lastVelocity, float *maxMoveContrib) { b2Vec2 contrib[e->numDrones]; memset(contrib, 0x0, e->numDrones * sizeof(b2Vec2)); uint16_t step = 0; bool braking = false; + float defaultDamping = DRONE_LINEAR_DAMPING; + if (type != DRONE_ENTITY) { + defaultDamping = FLOATING_WALL_DAMPING; + } float droneDamping = DRONE_LINEAR_DAMPING; // calculate the contribution of forces and impulses of each drone - for (size_t i = 0; i < cc_array_size(drone->physicsTracking); i++) { - physicsStepInfo *physicsStep = safe_array_get_at(drone->physicsTracking, i); + for (size_t i = 0; i < cc_array_size(physicsTracking); i++) { + physicsStepInfo *physicsStep = safe_array_get_at(physicsTracking, i); // if the step has changed, apply damping to contributions if (physicsStep->step != step) { const uint16_t stepDiff = physicsStep->step - step; @@ -947,7 +965,7 @@ void findDroneKiller(const iwEnv *e, droneEntity *drone) { if (braking) { droneDamping = DRONE_BRAKE_DAMPING_COEF; } else { - droneDamping = DRONE_LINEAR_DAMPING; + droneDamping = defaultDamping; } } @@ -960,8 +978,8 @@ void findDroneKiller(const iwEnv *e, droneEntity *drone) { // determine the killer by finding the drone that pushed the dead // drone towards the wall that killed it the most - const b2Vec2 deathNormal = b2Normalize(drone->lastVelocity); - DEBUG_LOGF("> death normal (%f, %f) velocity (%f, %f)", deathNormal.x, deathNormal.y, drone->lastVelocity.x, drone->lastVelocity.y); + const b2Vec2 deathNormal = b2Normalize(lastVelocity); + DEBUG_LOGF("> death normal (%f, %f) velocity (%f, %f)", deathNormal.x, deathNormal.y, lastVelocity.x, lastVelocity.y); float maxContrib = -FLT_MAX; int8_t killer = -1; for (uint8_t i = 0; i < e->numDrones; i++) { @@ -981,24 +999,51 @@ void findDroneKiller(const iwEnv *e, droneEntity *drone) { killer = i; } } + if (killer != -1) { + *maxMoveContrib = maxContrib; + } + return killer; +} + +void findDroneKiller(iwEnv *e, droneEntity *drone, const wallEntity *killWall) { + float maxMoveContrib = -FLT_MAX; + DEBUG_LOG("finding drone killer"); + int8_t killer = findBiggestContributor(e, DRONE_ENTITY, drone->physicsTracking, drone->lastVelocity, &maxMoveContrib); + if (killWall != NULL && killWall->isFloating) { + float wallContrib = -FLT_MAX; + DEBUG_LOG("finding mover of floating death wall"); + const int8_t wallMover = findBiggestContributor(e, DEATH_WALL_ENTITY, killWall->physicsTracking, killWall->velocity, &wallContrib); + if (wallContrib > maxMoveContrib) { + DEBUG_LOGF(">>> drone %d killed by drone %d pushing floating death wall", drone->idx, wallMover); + killer = wallMover; + } + } + if (killer == -1) { + DEBUG_LOGF(">>> drone %d killed by UNKNOWN", drone->idx); + e->stats[drone->idx].unknownKills++; return; } DEBUG_LOGF(">>> drone %d killed by drone %d", drone->idx, killer); + if (killer == drone->idx) { + e->stats[drone->idx].selfKills++; + } else { + e->stats[killer].kills++; + } drone->killedBy = killer; droneEntity *killerDrone = safe_array_get_at(e->drones, killer); killerDrone->killed[drone->idx] = true; } -void killDrone(iwEnv *e, droneEntity *drone) { +void killDrone(iwEnv *e, droneEntity *drone, const wallEntity *killWall) { if (drone->dead || drone->livesLeft == 0) { return; } DEBUG_LOGF("drone %d died", drone->idx); - findDroneKiller(e, drone); + findDroneKiller(e, drone, killWall); drone->livesLeft--; drone->dead = true; @@ -1273,7 +1318,6 @@ bool explodeCallback(b2ShapeId shapeID, void *context) { ctx->e->stats[drone->idx].totalOwnShotsTaken++; DEBUG_LOGF("drone %d hit itself with explosion from weapon %d", drone->idx, ctx->projectile->weaponInfo->type); } - ctx->parentDrone->stepInfo.explosionHit[drone->idx] = true; if (ctx->isBurst) { DEBUG_LOGF("drone %d hit drone %d with burst", ctx->parentDrone->idx, drone->idx); ctx->e->stats[ctx->parentDrone->idx].burstsHit++; @@ -1284,7 +1328,6 @@ bool explodeCallback(b2ShapeId shapeID, void *context) { ctx->e->stats[ctx->parentDrone->idx].totalShotsHit++; DEBUG_LOGF("drone %d hit by explosion from weapon %d from drone %d", drone->idx, ctx->projectile->weaponInfo->type, ctx->parentDrone->idx); } - drone->stepInfo.explosionTaken[ctx->parentDrone->idx] = true; transform.p = drone->pos; transform.q = b2Rot_identity; break; @@ -1318,6 +1361,7 @@ bool explodeCallback(b2ShapeId shapeID, void *context) { b2SimplexCache cache = {0}; const b2DistanceOutput output = b2ShapeDistance(&input, &cache, NULL, 0); + if (output.distance > ctx->def->radius) { return true; } @@ -1439,7 +1483,11 @@ bool explodeCallback(b2ShapeId shapeID, void *context) { case STANDARD_WALL_ENTITY: case BOUNCY_WALL_ENTITY: case DEATH_WALL_ENTITY: - b2Body_ApplyLinearImpulse(bodyID, impulse, output.pointA, true); + if (wall->isFloating) { + applyTrackedImpulse(ctx->e, wall->bodyID, wall->physicsTracking, impulse, ctx->parentDrone->idx); + } else { + b2Body_ApplyLinearImpulse(bodyID, impulse, output.pointA, true); + } wall->velocity = b2Body_GetLinearVelocity(wall->bodyID); break; case PROJECTILE_ENTITY: @@ -1455,7 +1503,7 @@ bool explodeCallback(b2ShapeId shapeID, void *context) { break; case DRONE_ENTITY: - droneApplyImpulse(ctx->e, drone, impulse, ctx->parentDrone->idx); + applyTrackedImpulse(ctx->e, drone->bodyID, drone->physicsTracking, impulse, ctx->parentDrone->idx); drone->lastVelocity = drone->velocity; drone->velocity = b2Body_GetLinearVelocity(drone->bodyID); @@ -1475,6 +1523,10 @@ bool explodeCallback(b2ShapeId shapeID, void *context) { } } + const float explosionStrength = b2AbsFloat(b2Length(impulse)); + ctx->parentDrone->stepInfo.explosionHit[drone->idx] += explosionStrength; + drone->stepInfo.explosionTaken[ctx->parentDrone->idx] += explosionStrength; + break; default: ERRORF("unknown entity type for burst impulse %d", entity->type); @@ -1549,7 +1601,7 @@ void applyDroneBurstImpulse(iwEnv *e, explosionCtx *ctx, const droneEntity *dron DEBUG_LOGF("walls used: %d magnitude: %f final: %f", wallsUsed, magnitude, magnitude / (float)wallsUsed); const b2Vec2 finalImpulse = b2MulSV(magnitude / (float)wallsUsed, b2Normalize(direction)); ASSERT(b2IsValidVec2(finalImpulse)); - droneApplyImpulse(e, drone, finalImpulse, drone->idx); + applyTrackedImpulse(e, drone->bodyID, drone->physicsTracking, finalImpulse, drone->idx); } void createExplosion(iwEnv *e, droneEntity *drone, const projectileEntity *projectile, const b2ExplosionDef *def) { @@ -1743,7 +1795,7 @@ void handleSuddenDeath(iwEnv *e) { .maskBits = WALL_SHAPE, }; if (isOverlappingAABB(e, drone->pos, DRONE_RADIUS, filter)) { - killDrone(e, drone); + killDrone(e, drone, NULL); } } @@ -1791,7 +1843,7 @@ void droneMove(const iwEnv *e, droneEntity *drone, b2Vec2 direction) { drone->lastMove = direction; } const b2Vec2 force = b2MulSV(DRONE_MOVE_MAGNITUDE, direction); - droneApplyForce(e, drone, force, drone->idx); + applyTrackedForce(e, drone->bodyID, drone->physicsTracking, force, drone->idx); } void droneShoot(iwEnv *e, droneEntity *drone, const b2Vec2 aim, const bool chargingWeapon) { @@ -1833,7 +1885,7 @@ void droneShoot(iwEnv *e, droneEntity *drone, const b2Vec2 aim, const bool charg } ASSERT_VEC_NORMALIZED(normAim); b2Vec2 recoil = b2MulSV(-drone->weaponInfo->recoilMagnitude, normAim); - droneApplyImpulse(e, drone, recoil, drone->idx); + applyTrackedImpulse(e, drone->bodyID, drone->physicsTracking, recoil, drone->idx); for (int i = 0; i < drone->weaponInfo->numProjectiles; i++) { createProjectile(e, drone, normAim); @@ -1934,6 +1986,7 @@ void droneBurst(iwEnv *e, droneEntity *drone) { b2ExplosionDef explosion = { .position = drone->pos, .radius = radius, + .falloff = 0.0f, .impulsePerLength = (DRONE_BURST_IMPACT_BASE * drone->burstCharge) + DRONE_BURST_IMPACT_MIN, .maskBits = WALL_SHAPE | FLOATING_WALL_SHAPE | PROJECTILE_SHAPE | DRONE_SHAPE, }; @@ -2074,13 +2127,11 @@ void handleBlackHolePull(iwEnv *e, projectileEntity *projectile) { case BOUNCY_WALL_ENTITY: case DEATH_WALL_ENTITY: wall = ent->entity; - bodyID = wall->bodyID; shapeID = wall->shapeID; rot = wall->rot; break; case DRONE_ENTITY: drone = ent->entity; - bodyID = drone->bodyID; shapeID = drone->shapeID; hasShield = drone->shield != NULL; break; @@ -2126,10 +2177,15 @@ void handleBlackHolePull(iwEnv *e, projectileEntity *projectile) { const b2Vec2 force = b2MulSV(magnitude, direction); if (entityTypeIsWall(ent->type)) { - b2Body_ApplyForce(bodyID, force, output.pointB, true); + wallEntity *wall = ent->entity; + if (wall->isFloating) { + applyTrackedForce(e, wall->bodyID, wall->physicsTracking, force, projectile->droneIdx); + } else { + b2Body_ApplyForce(wall->bodyID, force, output.pointB, true); + } } else if (ent->type == DRONE_ENTITY) { droneEntity *drone = ent->entity; - droneApplyForce(e, drone, force, projectile->droneIdx); + applyTrackedForce(e, drone->bodyID, drone->physicsTracking, force, projectile->droneIdx); } else { b2Body_ApplyForceToCenter(bodyID, force, true); } @@ -2313,7 +2369,7 @@ void handleBodyMoveEvents(iwEnv *e) { mapIdx = entityPosToCellIdx(e, newPos); if (mapIdx == -1) { DEBUG_LOGF("invalid position for drone: (%f, %f) killing it", newPos.x, newPos.y); - killDrone(e, drone); + killDrone(e, drone, NULL); continue; } drone->mapCellIdx = mapIdx; @@ -2367,9 +2423,22 @@ uint8_t handleProjectileBeginContact(iwEnv *e, const entity *proj, const entity // always allow all other projectiles to bounce off each other return false; - } else if (ent->type == BOUNCY_WALL_ENTITY) { - // always allow projectiles to bounce off bouncy walls - return false; + } else if (entityTypeIsWall(ent->type)) { + wallEntity *wall = ent->entity; + if (wall->isFloating && b2Contact_IsValid(contactID)) { + const b2Manifold manifold = b2Contact_GetData(contactID).manifold; + ASSERT(manifold.pointCount == 1); + b2Vec2 hitImpulse = b2MulSV(manifold.points[0].normalImpulse, manifold.normal); + if (!projIsShapeA) { + hitImpulse = b2Neg(hitImpulse); + } + applyTrackedImpulse(e, wall->bodyID, wall->physicsTracking, hitImpulse, projectile->droneIdx); + } + + if (ent->type == BOUNCY_WALL_ENTITY) { + // always allow projectiles to bounce off bouncy walls + return false; + } } else if (ent->type == SHIELD_ENTITY) { // always allow projectiles to bounce off shields, and update shield health shieldEntity *shield = ent->entity; @@ -2383,6 +2452,8 @@ uint8_t handleProjectileBeginContact(iwEnv *e, const entity *proj, const entity if (shield->health <= 0.0f) { droneEntity *parentDrone = safe_array_get_at(e->drones, projectile->droneIdx); droneAddEnergy(parentDrone, DRONE_SHIELD_BREAK_ENERGY_REFILL); + parentDrone->stepInfo.brokeShield[shield->drone->idx] = true; + e->stats[parentDrone->idx].shieldsBroken++; } return false; @@ -2393,6 +2464,7 @@ uint8_t handleProjectileBeginContact(iwEnv *e, const entity *proj, const entity } if (ent->type == DRONE_ENTITY) { droneEntity *hitDrone = ent->entity; + float hitStrength = 0.0f; if (b2Contact_IsValid(contactID)) { const b2Manifold manifold = b2Contact_GetData(contactID).manifold; ASSERT(manifold.pointCount == 1); @@ -2400,7 +2472,8 @@ uint8_t handleProjectileBeginContact(iwEnv *e, const entity *proj, const entity if (!projIsShapeA) { hitImpulse = b2Neg(hitImpulse); } - droneTrackImpulse(e, hitDrone, hitImpulse, projectile->droneIdx); + applyTrackedImpulse(e, hitDrone->bodyID, hitDrone->physicsTracking, hitImpulse, projectile->droneIdx); + hitStrength = b2AbsFloat(b2Length(hitImpulse)); } if (projectile->droneIdx != hitDrone->idx) { @@ -2410,12 +2483,11 @@ uint8_t handleProjectileBeginContact(iwEnv *e, const entity *proj, const entity const float impulseEnergy = projectile->lastSpeed * projectile->weaponInfo->mass * projectile->weaponInfo->energyRefillCoef; droneAddEnergy(shooterDrone, impulseEnergy); } - // add 1 so we can differentiate between no weapon and weapon 0 - shooterDrone->stepInfo.shotHit[hitDrone->idx] = projectile->weaponInfo->type + 1; + shooterDrone->stepInfo.shotHit[hitDrone->idx] += hitStrength; e->stats[shooterDrone->idx].shotsHit[projectile->weaponInfo->type]++; e->stats[shooterDrone->idx].totalShotsHit++; DEBUG_LOGF("drone %d hit drone %d with weapon %d", shooterDrone->idx, hitDrone->idx, projectile->weaponInfo->type); - hitDrone->stepInfo.shotTaken[shooterDrone->idx] = projectile->weaponInfo->type + 1; + hitDrone->stepInfo.shotTaken[shooterDrone->idx] += hitStrength; e->stats[hitDrone->idx].shotsTaken[projectile->weaponInfo->type]++; e->stats[hitDrone->idx].totalShotsTaken++; DEBUG_LOGF("drone %d hit by drone %d with weapon %d", hitDrone->idx, shooterDrone->idx, projectile->weaponInfo->type); @@ -2565,8 +2637,8 @@ void handleContactEvents(iwEnv *e) { const b2Manifold manifold = b2Contact_GetData(event->contactId).manifold; ASSERT(manifold.pointCount == 1); b2Vec2 hitImpulse = b2MulSV(manifold.points[0].normalImpulse, manifold.normal); - droneTrackImpulse(e, drone2, hitImpulse, drone1->idx); - droneTrackImpulse(e, drone1, b2Neg(hitImpulse), drone2->idx); + trackImpulse(e, drone2->physicsTracking, hitImpulse, drone1->idx); + trackImpulse(e, drone1->physicsTracking, b2Neg(hitImpulse), drone2->idx); } } @@ -2583,7 +2655,8 @@ void handleContactEvents(iwEnv *e) { if (e1->type == DEATH_WALL_ENTITY) { if (e2->type == DRONE_ENTITY) { droneEntity *drone = e2->entity; - killDrone(e, drone); + const wallEntity *wall = e1->entity; + killDrone(e, drone, wall); } else if (e2->type == SHIELD_ENTITY) { shieldEntity *shield = e2->entity; shield->health = 0.0f; @@ -2600,7 +2673,8 @@ void handleContactEvents(iwEnv *e) { if (e2->type == DEATH_WALL_ENTITY) { if (e1->type == DRONE_ENTITY) { droneEntity *drone = e1->entity; - killDrone(e, drone); + const wallEntity *wall = e2->entity; + killDrone(e, drone, wall); } else if (e1->type == SHIELD_ENTITY) { shieldEntity *shield = e1->entity; shield->health = 0.0f; diff --git a/pufferlib/ocean/impulse_wars/types.h b/pufferlib/ocean/impulse_wars/types.h index 3fdb3a6d7..c193548f1 100644 --- a/pufferlib/ocean/impulse_wars/types.h +++ b/pufferlib/ocean/impulse_wars/types.h @@ -310,6 +310,8 @@ typedef struct droneStats { float totalBursts; float burstsHit; float energyEmptied; + float selfKills; + float kills; float wins; float shotsFired[_NUM_WEAPONS]; From d6b2eab08c31e8d1251fb28778d311be92c30477 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 9 Oct 2025 15:28:23 +0000 Subject: [PATCH 047/188] Fix impulse wars --- pufferlib/ocean/impulse_wars/binding.c | 18 ++ pufferlib/ocean/impulse_wars/env.h | 134 +++++++---- pufferlib/ocean/impulse_wars/impulse_wars.py | 20 ++ pufferlib/ocean/impulse_wars/render.h | 17 +- pufferlib/ocean/impulse_wars/scripted_agent.h | 213 +++++++++++++----- pufferlib/ocean/impulse_wars/settings.h | 11 +- pufferlib/ocean/impulse_wars/types.h | 34 ++- 7 files changed, 325 insertions(+), 122 deletions(-) diff --git a/pufferlib/ocean/impulse_wars/binding.c b/pufferlib/ocean/impulse_wars/binding.c index 13d0880b5..28b429773 100644 --- a/pufferlib/ocean/impulse_wars/binding.c +++ b/pufferlib/ocean/impulse_wars/binding.c @@ -100,6 +100,21 @@ static int my_init(iwEnv *e, PyObject *args, PyObject *kwargs) { (bool)unpack(kwargs, "is_training"), (bool)unpack(kwargs, "continuous") ); + setRewards( + e, + (float)unpack(kwargs, "reward_win"), + (float)unpack(kwargs, "reward_self_kill"), + (float)unpack(kwargs, "reward_enemy_death"), + (float)unpack(kwargs, "reward_enemy_kill"), + 0.0f, // teammate death punishment + 0.0f, // teammate kill punishment + (float)unpack(kwargs, "reward_death"), + (float)unpack(kwargs, "reward_energy_emptied"), + (float)unpack(kwargs, "reward_weapon_pickup"), + (float)unpack(kwargs, "reward_shield_break"), + (float)unpack(kwargs, "reward_shot_hit_coef"), + (float)unpack(kwargs, "reward_explosion_hit_coef") + ); return 0; } @@ -131,8 +146,11 @@ static int my_log(PyObject *dict, Log *log) { assign_to_dict(dict, droneLog(buf, i, "total_bursts"), log->stats[i].totalBursts); assign_to_dict(dict, droneLog(buf, i, "bursts_hit"), log->stats[i].burstsHit); assign_to_dict(dict, droneLog(buf, i, "energy_emptied"), log->stats[i].energyEmptied); + assign_to_dict(dict, droneLog(buf, i, "shields_broken"), log->stats[i].shieldsBroken); + assign_to_dict(dict, droneLog(buf, i, "own_shield_broken"), log->stats[i].ownShieldBroken); assign_to_dict(dict, droneLog(buf, i, "self_kills"), log->stats[i].selfKills); assign_to_dict(dict, droneLog(buf, i, "kills"), log->stats[i].kills); + assign_to_dict(dict, droneLog(buf, i, "unknown_kills"), log->stats[i].unknownKills); assign_to_dict(dict, droneLog(buf, i, "wins"), log->stats[i].wins); // useful for debugging weapon balance, but really slows down diff --git a/pufferlib/ocean/impulse_wars/env.h b/pufferlib/ocean/impulse_wars/env.h index 33a220e8c..2162b5014 100644 --- a/pufferlib/ocean/impulse_wars/env.h +++ b/pufferlib/ocean/impulse_wars/env.h @@ -367,10 +367,10 @@ void computeObs(iwEnv *e) { continue; } - if (agentDrone->stepInfo.shotHit[i]) { + if (agentDrone->stepInfo.shotHit[i] != 0.0f) { hitShot = true; } - if (agentDrone->stepInfo.shotTaken[i]) { + if (agentDrone->stepInfo.shotTaken[i] != 0.0f) { tookShot = true; } @@ -539,6 +539,19 @@ iwEnv *initEnv(iwEnv *e, uint8_t numDrones, uint8_t numAgents, int8_t mapIdx, ui e->sittingDuck = sittingDuck; e->isTraining = isTraining; + e->winReward = WIN_REWARD; + e->selfKillPunishment = SELF_KILL_PUNISHMENT; + e->enemyDeathReward = ENEMY_DEATH_REWARD; + e->enemyKillReward = ENEMY_KILL_REWARD; + e->teammateDeathPunishment = TEAMMATE_DEATH_PUNISHMENT; + e->teammateKillPunishment = TEAMMATE_KILL_PUNISHMENT; + e->deathPunishment = DEATH_PUNISHMENT; + e->energyEmptiedPunishment = ENERGY_EMPTY_PUNISHMENT; + e->weaponPickupReward = WEAPON_PICKUP_REWARD; + e->shieldBreakReward = SHIELD_BREAK_REWARD; + e->shotHitRewardCoef = SHOT_HIT_REWARD_COEF; + e->explosionHitRewardCoef = EXPLOSION_HIT_REWARD_COEF; + e->obsBytes = obsBytes(e->numDrones); e->discreteObsBytes = alignedSize(discreteObsSize(e->numDrones) * sizeof(uint8_t), sizeof(float)); @@ -583,9 +596,28 @@ iwEnv *initEnv(iwEnv *e, uint8_t numDrones, uint8_t numAgents, int8_t mapIdx, ui e->humanDroneInput = 0; e->connectedControllers = 0; +#ifndef NDEBUG + create_array(&e->debugPoints, 4); +#endif + return e; } +void setRewards(iwEnv *e, float winReward, float selfKillPunishment, float enemyDeathReward, float enemyKillReward, float teammateDeathPunishment, float teammateKillPunishment, float deathPunishment, float energyEmptiedPunishment, float weaponPickupReward, float shieldBreakReward, float shotHitRewardCoef, float explosionHitRewardCoef) { + e->winReward = winReward; + e->selfKillPunishment = selfKillPunishment; + e->enemyDeathReward = enemyDeathReward; + e->enemyKillReward = enemyKillReward; + e->teammateDeathPunishment = teammateDeathPunishment; + e->teammateKillPunishment = teammateKillPunishment; + e->deathPunishment = deathPunishment; + e->energyEmptiedPunishment = energyEmptiedPunishment; + e->weaponPickupReward = weaponPickupReward; + e->shieldBreakReward = shieldBreakReward; + e->shotHitRewardCoef = shotHitRewardCoef; + e->explosionHitRewardCoef = explosionHitRewardCoef; +} + void clearEnv(iwEnv *e) { // rewards get cleared in stepEnv every step // memset(e->masks, 1, e->numAgents * sizeof(uint8_t)); @@ -673,6 +705,10 @@ void destroyEnv(iwEnv *e) { cc_array_destroy(e->dronePieces); b2DestroyWorld(e->worldID); + +#ifndef NDEBUG + cc_array_destroy(e->debugPoints); +#endif } void resetEnv(iwEnv *e) { @@ -680,21 +716,11 @@ void resetEnv(iwEnv *e) { setupEnv(e); } -float computeShotReward(const droneEntity *drone, const weaponInformation *weaponInfo) { - const float weaponForce = weaponInfo->fireMagnitude * weaponInfo->invMass; - const float scaledForce = (weaponForce * (weaponForce * SHOT_HIT_REWARD_COEF)) + 0.25f; - return scaledForce + computeHitStrength(drone); -} - -float computeExplosionReward(const droneEntity *drone) { - return computeHitStrength(drone) * EXPLOSION_HIT_REWARD_COEF; -} - float computeReward(iwEnv *e, droneEntity *drone) { float reward = 0.0f; if (drone->energyFullyDepleted && drone->energyRefillWait == DRONE_ENERGY_REFILL_EMPTY_WAIT) { - reward += ENERGY_EMPTY_PUNISHMENT; + reward += e->energyEmptiedPunishment; } // only reward picking up a weapon if the standard weapon was @@ -702,7 +728,7 @@ float computeReward(iwEnv *e, droneEntity *drone) { // weapon, but other weapons are situational better so don't // reward switching a non-standard weapon if (drone->stepInfo.pickedUpWeapon && drone->stepInfo.prevWeapon == STANDARD_WEAPON) { - reward += WEAPON_PICKUP_REWARD; + reward += e->weaponPickupReward; } for (uint8_t i = 0; i < e->numDrones; i++) { @@ -712,51 +738,51 @@ float computeReward(iwEnv *e, droneEntity *drone) { droneEntity *enemyDrone = safe_array_get_at(e->drones, i); const bool onTeam = drone->team == enemyDrone->team; - if (drone->stepInfo.shotHit[i] != 0 && !onTeam) { - // subtract 1 from the weapon type because 1 is added so we - // can use 0 as no shot was hit - const weaponInformation *weaponInfo = weaponInfos[drone->stepInfo.shotHit[i] - 1]; - reward += computeShotReward(enemyDrone, weaponInfo); + // TODO: punish for hitting teammates? + if (drone->stepInfo.shotHit[i] != 0.0f && !onTeam) { + reward += drone->stepInfo.shotHit[i] * e->shotHitRewardCoef; } - if (drone->stepInfo.explosionHit[i] && !onTeam) { - reward += computeExplosionReward(enemyDrone); + if (drone->stepInfo.explosionHit[i] != 0.0f && !onTeam) { + reward += drone->stepInfo.explosionHit[i] * e->explosionHitRewardCoef; + } + if (drone->stepInfo.brokeShield[i] && !onTeam) { + reward += e->shieldBreakReward; } if (e->numAgents == e->numDrones) { - if (drone->stepInfo.shotTaken[i] != 0 && !onTeam) { - const weaponInformation *weaponInfo = weaponInfos[drone->stepInfo.shotTaken[i] - 1]; - reward -= computeShotReward(drone, weaponInfo) * 0.5f; + if (drone->stepInfo.shotTaken[i] != 0) { + reward -= drone->stepInfo.shotTaken[i] * e->shotHitRewardCoef; } - if (drone->stepInfo.explosionTaken[i] && !onTeam) { - reward -= computeExplosionReward(drone) * 0.5f; + if (drone->stepInfo.explosionTaken[i]) { + reward -= drone->stepInfo.explosionTaken[i] * e->explosionHitRewardCoef; } } if (enemyDrone->dead && enemyDrone->diedThisStep) { if (!onTeam) { - reward += ENEMY_DEATH_REWARD; + reward += e->enemyDeathReward; if (drone->killed[i]) { - reward += ENEMY_KILL_REWARD; + reward += e->enemyKillReward; } } else { - reward += TEAMMATE_DEATH_PUNISHMENT; + reward += e->teammateDeathPunishment; if (drone->killed[i]) { - reward += TEAMMATE_KILL_PUNISHMENT; + reward += e->teammateKillPunishment; } } continue; } - const b2Vec2 enemyDirection = b2Normalize(b2Sub(enemyDrone->pos, drone->pos)); - const float velocityToEnemy = b2Dot(drone->lastVelocity, enemyDirection); - const float enemyDistance = b2Distance(enemyDrone->pos, drone->pos); - // stop rewarding approaching an enemy if they're very close - // to avoid constant clashing; always reward approaching when - // the current weapon is the shotgun, it greatly benefits from - // being close to enemies - if (velocityToEnemy > 0.1f && (drone->weaponInfo->type == SHOTGUN_WEAPON || enemyDistance > DISTANCE_CUTOFF)) { - reward += APPROACH_REWARD; - } + // const b2Vec2 enemyDirection = b2Normalize(b2Sub(enemyDrone->pos, drone->pos)); + // const float velocityToEnemy = b2Dot(drone->lastVelocity, enemyDirection); + // const float enemyDistance = b2Distance(enemyDrone->pos, drone->pos); + // // stop rewarding approaching an enemy if they're very close + // // to avoid constant clashing; always reward approaching when + // // the current weapon is the shotgun, it greatly benefits from + // // being close to enemies + // if (velocityToEnemy > 0.1f && (drone->weaponInfo->type == SHOTGUN_WEAPON || enemyDistance > DISTANCE_CUTOFF)) { + // reward += APPROACH_REWARD; + // } } return reward; @@ -766,21 +792,19 @@ const float REWARD_EPS = 1.0e-6f; void computeRewards(iwEnv *e, const bool roundOver, const int8_t winner, const int8_t winningTeam) { if (roundOver && winner != -1 && winner < e->numAgents) { - e->rewards[winner] += WIN_REWARD; + e->rewards[winner] += e->winReward; } for (uint8_t i = 0; i < e->numDrones; i++) { float reward = 0.0f; droneEntity *drone = safe_array_get_at(e->drones, i); - if (!drone->dead) { - reward = computeReward(e, drone); - if (roundOver && winningTeam == drone->team) { - reward += WIN_REWARD; - } + reward = computeReward(e, drone); + if (!drone->dead && roundOver && winningTeam == drone->team) { + reward += e->winReward; } else if (drone->diedThisStep) { - reward = DEATH_PUNISHMENT; + reward = e->deathPunishment; if (drone->killedBy == drone->idx) { - reward += SELF_KILL_PUNISHMENT; + reward += e->selfKillPunishment; } } if (i < e->numAgents) { @@ -985,6 +1009,11 @@ void addLog(iwEnv *e, Log *log) { e->log.stats[j].totalBursts += log->stats[j].totalBursts; e->log.stats[j].burstsHit += log->stats[j].burstsHit; e->log.stats[j].energyEmptied += log->stats[j].energyEmptied; + e->log.stats[j].shieldsBroken += log->stats[j].shieldsBroken; + e->log.stats[j].ownShieldBroken += log->stats[j].ownShieldBroken; + e->log.stats[j].selfKills += log->stats[j].selfKills; + e->log.stats[j].kills += log->stats[j].kills; + e->log.stats[j].unknownKills += log->stats[j].unknownKills; for (uint8_t k = 0; k < NUM_WEAPONS; k++) { e->log.stats[j].shotsFired[k] += log->stats[j].shotsFired[k]; @@ -1006,6 +1035,7 @@ void addLog(iwEnv *e, Log *log) { e->log.n += 1.0f; } +// TODO: 2nd agent doesn't seem to work right void stepEnv(iwEnv *e) { if (e->needsReset) { DEBUG_LOG("Resetting environment"); @@ -1017,6 +1047,14 @@ void stepEnv(iwEnv *e) { #endif } +#ifndef NDEBUG + for (uint8_t i = 0; i < cc_array_size(e->debugPoints); i++) { + debugPoint *point = safe_array_get_at(e->debugPoints, i); + fastFree(point); + } + cc_array_remove_all(e->debugPoints); +#endif + agentActions stepActions[e->numDrones]; memset(stepActions, 0x0, e->numDrones * sizeof(agentActions)); diff --git a/pufferlib/ocean/impulse_wars/impulse_wars.py b/pufferlib/ocean/impulse_wars/impulse_wars.py index 0f958b8dc..6fc2f5d27 100644 --- a/pufferlib/ocean/impulse_wars/impulse_wars.py +++ b/pufferlib/ocean/impulse_wars/impulse_wars.py @@ -28,6 +28,16 @@ def __init__( continuous: bool = False, is_training: bool = True, human_control: bool = False, + reward_win: float = 2.0, + reward_self_kill: float = -1.0, + reward_enemy_death: float = 1.0, + reward_enemy_kill: float = 1.0, + reward_death: float = -0.25, + reward_energy_emptied: float = -0.75, + reward_weapon_pickup: float = 0.5, + reward_shield_break: float = 0.5, + reward_shot_hit_coef: float = 0.005, + reward_explosion_hit_coef: float = 0.005, seed: int = 0, render: bool = False, report_interval: int = 64, @@ -98,6 +108,16 @@ def __init__( sitting_duck=sitting_duck, is_training=is_training, continuous=continuous, + reward_win=reward_win, + reward_self_kill=reward_self_kill, + reward_enemy_death=reward_enemy_death, + reward_enemy_kill=reward_enemy_kill, + reward_death=reward_death, + reward_energy_emptied=reward_energy_emptied, + reward_weapon_pickup=reward_weapon_pickup, + reward_shield_break=reward_shield_break, + reward_shot_hit_coef=reward_shot_hit_coef, + reward_explosion_hit_coef=reward_explosion_hit_coef, ) binding.shared(self.c_envs) diff --git a/pufferlib/ocean/impulse_wars/render.h b/pufferlib/ocean/impulse_wars/render.h index ca7b94c32..5eee86103 100644 --- a/pufferlib/ocean/impulse_wars/render.h +++ b/pufferlib/ocean/impulse_wars/render.h @@ -856,6 +856,7 @@ void renderUI(const iwEnv *e, const bool starting) { renderTimer(e, timerStr, PUFF_WHITE); } +// TODO: track when trails begine and end (ie when respawning) void renderBrakeTrails(iwEnv *e, const droneEntity *drone) { const float maxLifetime = 3.0f * e->frameRate; const float maxAlpha = 0.33f; @@ -923,7 +924,7 @@ void renderBrakeTrails(iwEnv *e, const droneEntity *drone) { } } -// TODO: improve +// TODO: make 2D circles void renderExplosions(const iwEnv *e) { const uint16_t maxRenderSteps = EXPLOSION_TIME * e->frameRate; @@ -940,7 +941,7 @@ void renderExplosions(const iwEnv *e) { continue; } - // color bursts with a bit of the parent drone's color' + // color bursts with a bit of the parent drone's color const float alpha = (float)explosion->renderSteps / maxRenderSteps; BeginBlendMode(BLEND_ALPHA); if (false && explosion->isBurst) { @@ -1658,9 +1659,6 @@ void _renderEnv(iwEnv *e, const bool starting, const bool ending, const int8_t w BeginBlendMode(BLEND_ALPHA); for (uint8_t i = 0; i < cc_array_size(e->drones); i++) { const droneEntity *drone = safe_array_get_at(e->drones, i); - if (drone->dead) { - continue; - } renderBrakeTrails(e, drone); } EndBlendMode(); @@ -1726,10 +1724,13 @@ void _renderEnv(iwEnv *e, const bool starting, const bool ending, const int8_t w renderDroneUI(drone); } - if (!b2VecEqual(e->debugPoint, b2Vec2_zero)) { - const Vector2 pos = {.x = e->debugPoint.x, .y = e->debugPoint.y}; - DrawCircleV(pos, DRONE_RADIUS * 0.5f, WHITE); +#ifndef NDEBUG + for (uint8_t i = 0; i < cc_array_size(e->debugPoints); i++) { + debugPoint *point = safe_array_get_at(e->debugPoints, i); + const Vector2 pos = {.x = point->pos.x, .y = point->pos.y}; + DrawCircleV(pos, point->size, point->color); } +#endif EndMode2D(); diff --git a/pufferlib/ocean/impulse_wars/scripted_agent.h b/pufferlib/ocean/impulse_wars/scripted_agent.h index eaa4c096f..fa0820722 100644 --- a/pufferlib/ocean/impulse_wars/scripted_agent.h +++ b/pufferlib/ocean/impulse_wars/scripted_agent.h @@ -10,15 +10,31 @@ const uint8_t NUM_NEAR_PICKUPS = 1; const float WALL_CHECK_DISTANCE_SQUARED = SQUARED(6.0f); const float WALL_AVOID_DISTANCE = 4.0f; const float WALL_DANGER_DISTANCE = 3.0f; -const float WALL_BRAKE_DISTANCE = 20.0f; -const float WALL_BRAKE_SPEED = 12.5f; -const float WALL_BRAKE_TIME = 0.5f; +const float WALL_BURST_CHECK_DISTANCE = 5.0f; +const float WALL_BURST_CHECK_SPEED = 20.0f; const float BURST_MIN_RADIUS_SQUARED = SQUARED(DRONE_BURST_RADIUS_MIN); -const float MOVE_SPEED_SQUARED = SQUARED(5.0f); +const float STABILIZE_MOVE_SPEED = 5.0f; +const float SHOTGUN_DANGER_DISTANCE = 4.0f; + +void addDebugPoint(iwEnv *e, b2Vec2 pos, float size, Color color) { +#ifndef NDEBUG + debugPoint *point = fastCalloc(1, sizeof(debugPoint)); + point->pos = pos; + point->size = size; + point->color = color; + cc_array_add(e->debugPoints, point); +#else + MAYBE_UNUSED(e); + MAYBE_UNUSED(pos); + MAYBE_UNUSED(size); + MAYBE_UNUSED(color); +#endif +} typedef struct castCircleCtx { bool hit; b2ShapeId shapeID; + b2Vec2 point; } castCircleCtx; float castCircleCallback(b2ShapeId shapeId, b2Vec2 point, b2Vec2 normal, float fraction, void *context) { @@ -33,6 +49,7 @@ float castCircleCallback(b2ShapeId shapeId, b2Vec2 point, b2Vec2 normal, float f castCircleCtx *ctx = context; ctx->hit = true; ctx->shapeID = shapeId; + ctx->point = point; return 0.0f; } @@ -122,45 +139,70 @@ void pathfindBFS(const iwEnv *e, uint8_t *flatPaths, uint16_t destCellIdx) { } } -float distanceWithDamping(const iwEnv *e, const droneEntity *drone, const b2Vec2 direction, const float linearDamping, const float steps) { - float speed = drone->weaponInfo->recoilMagnitude * DRONE_INV_MASS; - if (!b2VecEqual(drone->velocity, b2Vec2_zero)) { - speed = b2Length(b2MulAdd(drone->velocity, speed, direction)); - } - - const float damping = 1.0f + linearDamping * e->deltaTime; +float distanceWithDamping(const iwEnv *e, const float speed, const float linearDamping, const float steps) { + const float damping = 1.0f + (linearDamping * e->deltaTime); return speed * (damping / linearDamping) * (1.0f - powf(1.0f / damping, steps)); } +b2Vec2 positionWithDamping(const iwEnv *e, const droneEntity *drone, const b2Vec2 impulse, const float linearDamping, const float steps) { + const b2Vec2 newVel = b2Add(drone->velocity, impulse); + const float speed = b2Length(newVel); + const float distance = distanceWithDamping(e, speed, linearDamping, steps); + const b2Vec2 direction = b2Normalize(newVel); + return b2MulAdd(drone->pos, distance, direction); +} + +// returns true if drone can fire in the given direction without hitting +// or getting too close to a death wall before it can fire again bool safeToFire(iwEnv *e, const droneEntity *drone, const b2Vec2 direction) { + // don't shoot shotgun point blank at walls, the shots will immediately + // bounce back and send the drone flying uncontrollably + if (drone->weaponInfo->type == SHOTGUN_WEAPON) { + const b2Vec2 rayEnd = b2MulAdd(drone->pos, SHOTGUN_DANGER_DISTANCE, direction); + const b2Vec2 translation = b2Sub(rayEnd, drone->pos); + const b2QueryFilter filter = {.categoryBits = PROJECTILE_SHAPE, .maskBits = WALL_SHAPE | FLOATING_WALL_SHAPE}; + const b2RayResult rayRes = b2World_CastRayClosest(e->worldID, drone->pos, translation, filter); + if (rayRes.hit) { + return false; + } + } + float shotWait; if (drone->ammo > 1) { shotWait = ((drone->weaponInfo->coolDown + drone->weaponInfo->charge) / e->deltaTime) * 1.5f; } else { shotWait = ((e->defaultWeapon->coolDown + e->defaultWeapon->charge) / e->deltaTime) * 1.5f; } - const b2Vec2 invDirection = b2MulSV(-1.0f, direction); - const float recoilDistance = distanceWithDamping(e, drone, invDirection, DRONE_LINEAR_DAMPING, shotWait); + const float recoilSpeed = -drone->weaponInfo->recoilMagnitude * DRONE_INV_MASS; + const b2Vec2 recoil = b2MulSV(recoilSpeed, direction); + const b2Vec2 recoilPos = positionWithDamping(e, drone, recoil, DRONE_LINEAR_DAMPING, shotWait); - // e->debugPoint = b2MulAdd(drone->pos, recoilDistance, invDirection); + addDebugPoint(e, b2MulAdd(drone->pos, 2.0f * DRONE_RADIUS, direction), 0.5f, WHITE); const b2Vec2 pos = drone->pos; - const b2Vec2 rayEnd = b2MulAdd(pos, recoilDistance, invDirection); + const b2Vec2 rayEnd = recoilPos; const b2Vec2 translation = b2Sub(rayEnd, pos); - const b2ShapeProxy cirProxy = b2MakeProxy(&pos, 1, DRONE_RADIUS); + float radius = DRONE_RADIUS; + if (drone->shield != NULL) { + radius = DRONE_SHIELD_RADIUS; + } + const b2ShapeProxy cirProxy = b2MakeProxy(&pos, 1, radius); const b2QueryFilter filter = {.categoryBits = DRONE_SHAPE, .maskBits = WALL_SHAPE | FLOATING_WALL_SHAPE | DRONE_SHAPE}; castCircleCtx ctx = {0}; b2World_CastShape(e->worldID, &cirProxy, translation, filter, castCircleCallback, &ctx); if (!ctx.hit) { + addDebugPoint(e, recoilPos, 0.5f, LIME); return true; } else { const entity *ent = b2Shape_GetUserData(ctx.shapeID); - if (ent->type == STANDARD_WALL_ENTITY || ent->type == BOUNCY_WALL_ENTITY || ent->type == DRONE_ENTITY) { + if (entityTypeIsWall(ent->type) && ent->type != DEATH_WALL_ENTITY) { + addDebugPoint(e, recoilPos, 0.5f, LIME); return true; } } + addDebugPoint(e, recoilPos, 0.5f, MAROON); return false; } @@ -168,6 +210,8 @@ bool weaponSafeForMovement(const droneEntity *drone) { switch (drone->weaponInfo->type) { case IMPLODER_WEAPON: case MINE_LAUNCHER_WEAPON: + case BLACK_HOLE_WEAPON: + case NUKE_WEAPON: return false; default: return true; @@ -236,6 +280,11 @@ float weaponIdealRangeSquared(const droneEntity *drone) { } bool shouldShootAtEnemy(iwEnv *e, const droneEntity *drone, const droneEntity *enemyDrone, const b2Vec2 enemyDroneDirection) { + // don't shoot at shielded enemies with railguns since it will likely + // bounce back and hit us + if (drone->weaponInfo->type == SNIPER_WEAPON && enemyDrone->shield != NULL) { + return false; + } if (!safeToFire(e, drone, enemyDroneDirection)) { return false; } @@ -257,6 +306,10 @@ bool shouldShootAtEnemy(iwEnv *e, const droneEntity *drone, const droneEntity *e if (ent == NULL || ent->type != DRONE_ENTITY) { return false; } + const droneEntity *hitDrone = ent->entity; + if (hitDrone->idx != enemyDrone->idx) { + return false; + } return true; } @@ -267,42 +320,42 @@ b2Vec2 predictiveAim(const droneEntity *drone, const droneEntity *enemyDrone, co return b2Normalize(b2Sub(predictedPos, drone->pos)); } -void handleWallProximity(iwEnv *e, const droneEntity *drone, const wallEntity *wall, const float distance, agentActions *actions) { - if (distance > WALL_BRAKE_DISTANCE) { +void scriptedAgentBurst(const droneEntity *drone, agentActions *actions) { + if (drone->chargingBurst) { return; + } else { + actions->chargingBurst = true; } +} +void handleWallProximity(iwEnv *e, const droneEntity *drone, const wallEntity *wall, const float distance, agentActions *actions) { + // shoot to move away faster from a death wall if we're too close and it's safe const b2Vec2 wallDirection = b2Normalize(b2Sub(wall->pos, drone->pos)); - const float speedToWall = b2Dot(drone->velocity, wallDirection); - if (speedToWall > WALL_BRAKE_SPEED) { - float damping = DRONE_LINEAR_DAMPING; - if (drone->braking) { - damping *= DRONE_BRAKE_DAMPING_COEF; - } - const float travelDistance = distanceWithDamping(e, drone, wallDirection, damping, WALL_BRAKE_TIME / e->deltaTime); - if (travelDistance >= distance) { - actions->brake = true; - } + if (distance <= WALL_DANGER_DISTANCE && weaponSafeForMovement(drone) && safeToFire(e, drone, wallDirection)) { + actions->aim = b2MulAdd(actions->aim, distance, wallDirection); + scriptedAgentShoot(drone, actions); } - if (actions->brake || distance <= WALL_AVOID_DISTANCE) { - const b2Vec2 invWallDirection = b2MulSV(-1.0f, wallDirection); - actions->move = b2MulAdd(actions->move, distance, invWallDirection); + // move away from the wall if we're too close + if (distance <= WALL_AVOID_DISTANCE) { + actions->move = b2MulAdd(actions->move, -distance, wallDirection); } +} - if (distance > WALL_DANGER_DISTANCE) { +// charge burst until we're close enough to a death wall to burst off +// of it +void wallBurst(iwEnv *e, const droneEntity *drone, const float speed, const float distance, agentActions *actions) { + if (distance < DRONE_BURST_RADIUS_MIN) { + scriptedAgentBurst(drone, actions); return; } - // shoot to move away faster from a death wall if we're too close and it's safe - if (weaponSafeForMovement(drone) && safeToFire(e, drone, wallDirection)) { - actions->aim = wallDirection; - scriptedAgentShoot(drone, actions); + float damping = DRONE_LINEAR_DAMPING; + if (drone->braking || actions->brake) { + damping *= DRONE_BRAKE_DAMPING_COEF; } -} - -void scriptedAgentBurst(const droneEntity *drone, agentActions *actions) { - if (drone->chargingBurst) { - return; + const float travelDistance = distanceWithDamping(e, b2Length(drone->velocity), damping, e->frameSkip); + if (travelDistance > distance) { + scriptedAgentBurst(drone, actions); } else { actions->chargingBurst = true; } @@ -314,17 +367,18 @@ agentActions scriptedAgentActions(iwEnv *e, droneEntity *drone) { return actions; } - // keep the weapon charged and ready if it needs it + // keep the weapon charged and ready if (drone->weaponInfo->charge != 0.0f) { actions.chargingWeapon = true; actions.shoot = true; } - // find the nearest death wall or floating wall + // find the N nearest death walls or floating walls nearEntity nearWalls[MAX_NEAREST_WALLS] = {0}; findNearWalls(e, drone, nearWalls, NUM_NEAR_WALLS); - // find the distance between the closest points on the drone and the nearest wall + // move away from and shoot at death walls if we're too close + float closestWallDistance = FLT_MAX; for (uint8_t i = 0; i < NUM_NEAR_WALLS; i++) { const wallEntity *wall = nearWalls[i].entity; if (wall->type != DEATH_WALL_ENTITY) { @@ -332,6 +386,7 @@ agentActions scriptedAgentActions(iwEnv *e, droneEntity *drone) { } const b2DistanceOutput output = closestPoint(drone->ent, wall->ent); + closestWallDistance = min(closestWallDistance, output.distance); handleWallProximity(e, drone, wall, output.distance, &actions); } @@ -345,9 +400,60 @@ agentActions scriptedAgentActions(iwEnv *e, droneEntity *drone) { } const b2DistanceOutput output = closestPoint(drone->ent, floatingWall->ent); + closestWallDistance = min(closestWallDistance, output.distance); handleWallProximity(e, drone, floatingWall, output.distance, &actions); } + // TODO: shoot mines around enemy, only if not too close + + // if we are moving towards a death wall at a high speed, do everything + // possible to avoid hitting it + const float droneSpeed = b2Length(drone->velocity); + if (drone->braking || drone->chargingBurst || closestWallDistance <= WALL_BURST_CHECK_DISTANCE || droneSpeed >= WALL_BURST_CHECK_SPEED) { + float damping = DRONE_LINEAR_DAMPING; + if (drone->braking || actions.brake) { + damping *= DRONE_BRAKE_DAMPING_COEF; + } + const b2Vec2 recoilPos = positionWithDamping(e, drone, b2Vec2_zero, damping, 0.5f / e->deltaTime); + const b2Vec2 pos = drone->pos; + const b2Vec2 rayEnd = recoilPos; + const b2Vec2 translation = b2Sub(rayEnd, pos); + float radius = DRONE_RADIUS; + if (drone->shield != NULL) { + radius = DRONE_SHIELD_RADIUS; + } + const b2ShapeProxy cirProxy = b2MakeProxy(&pos, 1, radius); + const b2QueryFilter filter = {.categoryBits = DRONE_SHAPE, .maskBits = WALL_SHAPE | FLOATING_WALL_SHAPE}; + + castCircleCtx ctx = {0}; + b2World_CastShape(e->worldID, &cirProxy, translation, filter, castCircleCallback, &ctx); + if (ctx.hit) { + const entity *ent = b2Shape_GetUserData(ctx.shapeID); + if (entityTypeIsWall(ent->type) && ent->type == DEATH_WALL_ENTITY) { + actions.brake = true; + if (drone->shield == NULL) { + wallBurst(e, drone, droneSpeed, b2Distance(drone->pos, ctx.point), &actions); + } + + const b2Vec2 droneDirection = b2Normalize(drone->velocity); + if (b2VecEqual(actions.move, b2Vec2_zero)) { + actions.move = b2MulSV(-1.0f, droneDirection); + } + if (b2VecEqual(actions.aim, b2Vec2_zero) && weaponSafeForMovement(drone)) { + actions.aim = droneDirection; + scriptedAgentShoot(drone, &actions); + } + return actions; + } + } + } + + // if we're close enough to a wall to need to shoot at it, don't + // worry about enemies + if (!b2VecEqual(actions.aim, b2Vec2_zero)) { + return actions; + } + // get a weapon if the standard weapon is active if (drone->weaponInfo->type == STANDARD_WEAPON && cc_array_size(e->pickups) != 0) { nearEntity nearPickups[MAX_WEAPON_PICKUPS] = {0}; @@ -389,12 +495,10 @@ agentActions scriptedAgentActions(iwEnv *e, droneEntity *drone) { } } if (enemyDrone == NULL) { - return actions; - } - - // if we're close enough to a wall to need to shoot at it, don't - // worry about enemies - if (!b2VecEqual(actions.aim, b2Vec2_zero)) { + // fight recoil if we're not otherwise moving + if (b2VecEqual(actions.move, b2Vec2_zero) && droneSpeed >= STABILIZE_MOVE_SPEED) { + actions.move = b2MulSV(-1.0f, b2Normalize(drone->velocity)); + } return actions; } @@ -423,11 +527,8 @@ agentActions scriptedAgentActions(iwEnv *e, droneEntity *drone) { } // fight recoil if we're not otherwise moving - if (b2VecEqual(actions.move, b2Vec2_zero)) { - const float speedSquared = b2LengthSquared(drone->velocity); - if (speedSquared > MOVE_SPEED_SQUARED) { - actions.move = b2MulSV(-1.0f, b2Normalize(drone->velocity)); - } + if (b2VecEqual(actions.move, b2Vec2_zero) && droneSpeed >= STABILIZE_MOVE_SPEED) { + actions.move = b2MulSV(-1.0f, b2Normalize(drone->velocity)); } return actions; diff --git a/pufferlib/ocean/impulse_wars/settings.h b/pufferlib/ocean/impulse_wars/settings.h index 3a20f4652..f79222415 100644 --- a/pufferlib/ocean/impulse_wars/settings.h +++ b/pufferlib/ocean/impulse_wars/settings.h @@ -24,7 +24,7 @@ const uint8_t NUM_MAPS = 9; #define MAX_NEAREST_WALLS 8 -const uint8_t DRONE_LIVES = 1; +const uint8_t DRONE_LIVES = 3; const float DRONE_RESPAWN_WAIT = 2.0f; const uint8_t ROUND_STEPS = 90; const uint8_t SUDDEN_DEATH_STEPS = 5; @@ -34,8 +34,8 @@ const uint8_t MAX_DRONES = _MAX_DRONES; const uint16_t LOG_BUFFER_SIZE = 1024; // reward settings -const float WIN_REWARD = 1.5f; -const float SELF_KILL_PUNISHMENT = -2.0f; +const float WIN_REWARD = 2.0f; +const float SELF_KILL_PUNISHMENT = -1.0f; const float ENEMY_DEATH_REWARD = 1.0f; const float ENEMY_KILL_REWARD = 1.0f; const float TEAMMATE_DEATH_PUNISHMENT = -0.5f; @@ -43,8 +43,9 @@ const float TEAMMATE_KILL_PUNISHMENT = -1.0f; const float DEATH_PUNISHMENT = 0.0f; const float ENERGY_EMPTY_PUNISHMENT = -0.75f; const float WEAPON_PICKUP_REWARD = 0.5f; -const float SHOT_HIT_REWARD_COEF = 0.000013333f; -const float EXPLOSION_HIT_REWARD_COEF = 5.0f; +const float SHIELD_BREAK_REWARD = 0.5f; +const float SHOT_HIT_REWARD_COEF = 0.005f; +const float EXPLOSION_HIT_REWARD_COEF = 0.005f; const float APPROACH_REWARD = 0.0f; // approach reward doesn't apply within the cutoff to avoid constant clashing diff --git a/pufferlib/ocean/impulse_wars/types.h b/pufferlib/ocean/impulse_wars/types.h index c193548f1..6df5014e9 100644 --- a/pufferlib/ocean/impulse_wars/types.h +++ b/pufferlib/ocean/impulse_wars/types.h @@ -119,6 +119,7 @@ typedef struct wallEntity { bool isFloating; enum entityType type; bool isSuddenDeath; + CC_Array *physicsTracking; entity *ent; } wallEntity; @@ -210,10 +211,11 @@ typedef struct droneStepInfo { bool firedShot; bool pickedUpWeapon; enum weaponType prevWeapon; - uint8_t shotHit[_MAX_DRONES]; - bool explosionHit[_MAX_DRONES]; - uint8_t shotTaken[_MAX_DRONES]; - bool explosionTaken[_MAX_DRONES]; + float shotHit[_MAX_DRONES]; + float explosionHit[_MAX_DRONES]; + float shotTaken[_MAX_DRONES]; + float explosionTaken[_MAX_DRONES]; + bool brokeShield[_MAX_DRONES]; bool ownShotTaken; } droneStepInfo; @@ -310,8 +312,11 @@ typedef struct droneStats { float totalBursts; float burstsHit; float energyEmptied; + float shieldsBroken; + float ownShieldBroken; float selfKills; float kills; + float unknownKills; float wins; float shotsFired[_NUM_WEAPONS]; @@ -400,6 +405,12 @@ typedef struct pathingInfo { int8_t *pathBuffer; } pathingInfo; +typedef struct debugPoint { + b2Vec2 pos; + float size; + Color color; +} debugPoint; + typedef struct iwEnv { uint8_t numDrones; uint8_t numAgents; @@ -408,6 +419,19 @@ typedef struct iwEnv { bool sittingDuck; bool isTraining; + float winReward; + float selfKillPunishment; + float enemyDeathReward; + float enemyKillReward; + float teammateDeathPunishment; + float teammateKillPunishment; + float deathPunishment; + float energyEmptiedPunishment; + float weaponPickupReward; + float shieldBreakReward; + float shotHitRewardCoef; + float explosionHitRewardCoef; + uint16_t obsBytes; uint16_t discreteObsBytes; bool continuousActions; @@ -468,7 +492,7 @@ typedef struct iwEnv { rayClient *client; float renderScale; CC_Array *explosions; - b2Vec2 debugPoint; + CC_Array *debugPoints; } iwEnv; #endif From c42db1c3c2ad047072776ca6b260e2bd0f00d247 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 9 Oct 2025 17:47:41 +0000 Subject: [PATCH 048/188] IW sweep --- pufferlib/config/ocean/impulse_wars.ini | 2 +- pufferlib/ocean/impulse_wars/scripted_agent.h | 4 ++-- pufferlib/ocean/torch.py | 21 +++++++++---------- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/pufferlib/config/ocean/impulse_wars.ini b/pufferlib/config/ocean/impulse_wars.ini index ddb8d3de5..36215fadb 100644 --- a/pufferlib/config/ocean/impulse_wars.ini +++ b/pufferlib/config/ocean/impulse_wars.ini @@ -6,8 +6,8 @@ rnn_name = ImpulseWarsLSTM max_suggestion_cost = 10_800 [policy] -cnn_channels = 64 hidden_size = 512 +cnn_channels = 64 # These must match what's set in env below continuous = False diff --git a/pufferlib/ocean/impulse_wars/scripted_agent.h b/pufferlib/ocean/impulse_wars/scripted_agent.h index fa0820722..d32fd500f 100644 --- a/pufferlib/ocean/impulse_wars/scripted_agent.h +++ b/pufferlib/ocean/impulse_wars/scripted_agent.h @@ -343,7 +343,7 @@ void handleWallProximity(iwEnv *e, const droneEntity *drone, const wallEntity *w // charge burst until we're close enough to a death wall to burst off // of it -void wallBurst(iwEnv *e, const droneEntity *drone, const float speed, const float distance, agentActions *actions) { +void wallBurst(iwEnv *e, const droneEntity *drone, const float distance, agentActions *actions) { if (distance < DRONE_BURST_RADIUS_MIN) { scriptedAgentBurst(drone, actions); return; @@ -432,7 +432,7 @@ agentActions scriptedAgentActions(iwEnv *e, droneEntity *drone) { if (entityTypeIsWall(ent->type) && ent->type == DEATH_WALL_ENTITY) { actions.brake = true; if (drone->shield == NULL) { - wallBurst(e, drone, droneSpeed, b2Distance(drone->pos, ctx.point), &actions); + wallBurst(e, drone, b2Distance(drone->pos, ctx.point), &actions); } const b2Vec2 droneDirection = b2Normalize(drone->velocity); diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index 8990563c2..fdfbbbdf0 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -53,11 +53,11 @@ def decode_actions(self, flat_hidden, state=None): return action, value class NMMO3LSTM(pufferlib.models.LSTMWrapper): - def __init__(self, env, policy, hidden_size=512): - super().__init__(env, policy, hidden_size) + def __init__(self, env, policy, input_size=512, hidden_size=512): + super().__init__(env, policy, input_size, hidden_size) class NMMO3(nn.Module): - def __init__(self, env, hidden_size=512, **kwargs): + def __init__(self, env, hidden_size=512, output_size=512, **kwargs): super().__init__() self.hidden_size = hidden_size #self.dtype = pufferlib.pytorch.nativize_dtype(env.emulated) @@ -88,8 +88,8 @@ def __init__(self, env, hidden_size=512, **kwargs): self.layer_norm = nn.LayerNorm(hidden_size) self.actor = pufferlib.pytorch.layer_init( - nn.Linear(hidden_size, self.num_actions), std=0.01) - self.value_fn = pufferlib.pytorch.layer_init(nn.Linear(hidden_size, 1), std=1) + nn.Linear(output_size, self.num_actions), std=0.01) + self.value_fn = pufferlib.pytorch.layer_init(nn.Linear(output_size, 1), std=1) def forward(self, x, state=None): hidden = self.encode_observations(x) @@ -526,8 +526,8 @@ def decode_actions(self, flat_hidden): return action, value class TowerClimbLSTM(pufferlib.models.LSTMWrapper): - def __init__(self, env, policy, hidden_size = 256): - super().__init__(env, policy, hidden_size) + def __init__(self, env, policy, input_size = 256, hidden_size = 256): + super().__init__(env, policy, input_size, hidden_size) class TowerClimb(nn.Module): def __init__(self, env, cnn_channels=16, hidden_size = 256, **kwargs): @@ -582,7 +582,7 @@ def decode_actions(self, flat_hidden): class ImpulseWarsLSTM(Recurrent): - def __init__(self, env, policy, hidden_size=512, **kwargs): + def __init__(self, env, policy, hidden_size: int = 512, **kwargs): super().__init__(env, policy, hidden_size) @@ -597,7 +597,6 @@ def __init__( num_drones: int = 2, continuous: bool = False, is_training: bool = True, - device: str = "cuda", **kwargs, ): super().__init__() @@ -615,13 +614,13 @@ def __init__( + [self.obsInfo.wallTypes + 1] * self.obsInfo.numFloatingWallObs + [self.numDrones + 1] * self.obsInfo.numProjectileObs, ) - discreteOffsets = torch.tensor([0] + list(np.cumsum(self.discreteFactors)[:-1]), device=device).view( + discreteOffsets = torch.tensor([0] + list(np.cumsum(self.discreteFactors)[:-1])).view( 1, -1 ) self.register_buffer("discreteOffsets", discreteOffsets, persistent=False) self.discreteMultihotDim = self.discreteFactors.sum() - multihotBuffer = torch.zeros(batch_size, self.discreteMultihotDim, device=device) + multihotBuffer = torch.zeros(batch_size, self.discreteMultihotDim) self.register_buffer("multihotOutput", multihotBuffer, persistent=False) # most of the observation is a 2D array of bytes, but the end From c32430f3aec08a884139d82021c69f744bc6c601 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 9 Oct 2025 18:12:58 +0000 Subject: [PATCH 049/188] initial box plot --- pufferlib/ocean/plot/plot.c | 241 ++++++++++++++++++++++++++++++------ 1 file changed, 200 insertions(+), 41 deletions(-) diff --git a/pufferlib/ocean/plot/plot.c b/pufferlib/ocean/plot/plot.c index b41aa911e..bcf6c9eca 100644 --- a/pufferlib/ocean/plot/plot.c +++ b/pufferlib/ocean/plot/plot.c @@ -22,6 +22,12 @@ const float EMPTY = -4242.0f; #define TOGGLE_WIDTH 60 #define DROPDOWN_WIDTH 200 +typedef struct { + char *key; + float *values; + int size; +} KeyValue; + typedef struct PlotArgs { float x_min; float x_max; @@ -37,7 +43,8 @@ typedef struct PlotArgs { int legend_font_size; int line_width; int tick_length; - int margin; + int x_margin; + int y_margin; Color font_color; Color background_color; Color axis_color; @@ -62,7 +69,8 @@ PlotArgs DEFAULT_PLOT_ARGS = { .legend_font_size = 12, .line_width = 2, .tick_length = 8, - .margin = 70, + .x_margin = 70, + .y_margin = 70, .font_color = PUFF_WHITE, .background_color = PUFF_BACKGROUND, .axis_color = PUFF_WHITE, @@ -98,10 +106,10 @@ void draw_axes(PlotArgs args) { int height = args.height; // Draw axes - DrawLine(args.margin, args.margin, - args.margin, height - args.margin, PUFF_WHITE); - DrawLine(args.margin, height - args.margin, - width - args.margin, height - args.margin, PUFF_WHITE); + DrawLine(args.x_margin, args.y_margin, + args.x_margin, height - args.y_margin, PUFF_WHITE); + DrawLine(args.x_margin, height - args.y_margin, + width - args.x_margin, height - args.y_margin, PUFF_WHITE); // X label Vector2 x_font_size = MeasureTextEx(args.font, args.x_label, args.axis_font_size, 0); @@ -131,19 +139,19 @@ void draw_axes(PlotArgs args) { // Autofit number of ticks Vector2 tick_label_size = MeasureTextEx(args.font, "estimate", args.axis_font_size, 0); - int num_x_ticks = (width - 2*args.margin)/tick_label_size.x; - int num_y_ticks = (height - 2*args.margin)/tick_label_size.x; + int num_x_ticks = (width - 2*args.x_margin)/tick_label_size.x; + int num_y_ticks = (height - 2*args.y_margin)/tick_label_size.x; // X ticks for (int i=0; ix_min = 1e-8; + args->x_max = 1e8; + + for (int i=0; ix_min = fmin(args->x_min, values[j]); + //args->x_max = fmax(args->x_max, values[j]); + } + } +} + + + int main(void) { FILE *file = fopen("pufferlib/ocean/plot/data.json", "r"); if (!file) { @@ -360,10 +508,14 @@ int main(void) { } // Load all keys and their float arrays + int hyper_count = 0; int idx = 0; item = root->child; while (item) { map[idx].key = strdup(item->string); + if (strncmp(map[idx].key, "train", 5) == 0) { + hyper_count++; + } if (!map[idx].key) { printf("Memory allocation error for key\n"); return cleanup(map, map_count, root, json_str); @@ -399,9 +551,9 @@ int main(void) { } // Create items as an array of strings - if (map_count > 20) { - map_count = 20; - } + //if (map_count > 100) { + // map_count = 100; + //} char **items = malloc(map_count * sizeof(char *)); if (!items) { printf("Memory allocation error\n"); @@ -428,6 +580,17 @@ int main(void) { strcat(options, map[i].key); } + // Hypers + + hyper_count = 5; + char *hyper_key[5] = {"train/learning_rate", "train/gamma", "train/gae_lambda", "train/ent_coef", "train/vf_coef"}; + KeyValue hypers[5]; + for (int i=0; i<5; i++) { + hypers[i].key = hyper_key[i]; + hypers[i].values = get_values(map, map_count, hyper_key[i], &hypers[i].size); + } + float *box_mmin = malloc(hyper_count * sizeof(float)); + float *box_mmax = malloc(hyper_count * sizeof(float)); // Example usage: Print the arrays // Cleanup @@ -477,10 +640,12 @@ int main(void) { int fig2_y_idx = 0; PlotArgs args3 = DEFAULT_PLOT_ARGS; + args3.x_margin = 250; args3.font = GetFontDefault(); RenderTexture2D fig3 = LoadRenderTexture(args3.width, args3.height); bool fig3_x_active = false; int fig3_x_idx = 3; + bool fig3_x_log = true; bool fig3_y_active = false; int fig3_y_idx = 0; @@ -587,20 +752,14 @@ int main(void) { fig2_y_active = !fig2_y_active; } - x_label = items[fig3_x_idx]; - y_label = items[fig3_y_idx]; - args3.x_label = x_label; - args3.y_label = y_label; - x = get_values(map, map_count, x_label, &num_points); - y = get_values(map, map_count, y_label, &num_points); - args3.x_min = ary_min(x, num_points); - args3.x_max = ary_max(x, num_points); - args3.y_min = ary_min(y, num_points); - args3.y_max = ary_max(y, num_points); + compute_boxplot_data(hypers, box_mmin, box_mmax, hyper_count, &args3); + args3.x_label = "Value"; + args3.y_label = "Hyperparameter"; BeginTextureMode(fig3); ClearBackground(PUFF_BACKGROUND); - plot(x, y, num_points, args3); - draw_axes(args3); + boxplot(box_mmin, box_mmax, fig3_x_log, hyper_count, args3); + //draw_axes(args3); + draw_box_axes(hypers, hyper_count, args3); EndTextureMode(); DrawTextureRec( fig3.texture, From 492ec0800dec579ee752e0b8ee2acf278c82a5f9 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 9 Oct 2025 21:48:50 +0000 Subject: [PATCH 050/188] To pandas --- constellation.py | 211 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 172 insertions(+), 39 deletions(-) diff --git a/constellation.py b/constellation.py index 8177d59ec..68c09a744 100644 --- a/constellation.py +++ b/constellation.py @@ -19,7 +19,8 @@ PAPER_BG_COLOR = '#061a1a' LINE_WIDTH = 4 LINE_COLORS = ["#0000b3", "#0010d9", "#0020ff", "#0040ff", "#0060ff", "#0080ff", "#009fff", "#00bfff", "#00ffff"][::-1] -roygbiv = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet', 'white', 'gray'] +roygbiv = np.random.permutation(['aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgrey', 'darkgreen', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'grey', 'green', 'greenyellow', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow', 'lightgray', 'lightgrey', 'lightgreen', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray', 'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine', 'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise', 'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive', 'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip', 'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'red', 'rosybrown', 'royalblue', 'saddlebrown', 'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey', 'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white', 'whitesmoke', 'yellow', 'yellowgreen']) +#roygbiv = ['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet'] TITLE_FONT = dict( family=FONT_FAMILY, size=FONT_SIZE_TITLE, @@ -125,7 +126,7 @@ def load_sweep_data(path): return data -def cached_sweep_load(path): +def cached_sweep_load(path, env_name): cache_file = os.path.join(path, 'cache.json') if not os.path.exists(cache_file): data = load_sweep_data(os.path.join(path, '*.json')) @@ -140,22 +141,35 @@ def cached_sweep_load(path): scores = data['environment/score'] idxs = pareto_idx(steps, costs, scores) + + # Create a DataFrame for this environment + df_data = {} for k in data: - data[k] = [data[k][i] for i in idxs] - - data['environment/perf'] = [min(e, 1.0) for e in data['environment/perf']] - - if 'env/frameskip' in data: - skip = data['env/frameskip'] - data['agent_steps'] = [n*m for n, m in zip(data['agent_steps'], skip)] - - return data - -env_names = ['grid', 'moba', 'tower_climb', 'tetris', 'breakout', 'pong', 'g2048', 'snake', 'pacman'] -EXPERIMENTS = { - name: cached_sweep_load(f'experiments/logs/puffer_{name}') - for name in env_names -} + df_data[k] = [data[k][i] for i in idxs] + + # Apply performance cap + df_data['environment/perf'] = [min(e, 1.0) for e in df_data['environment/perf']] + + # Adjust steps by frameskip if present + if 'env/frameskip' in df_data: + skip = df_data['env/frameskip'] + df_data['agent_steps'] = [n*m for n, m in zip(df_data['agent_steps'], skip)] + + # Add environment name + df_data['env_name'] = [env_name] * len(idxs) + + return pd.DataFrame(df_data) + +env_names = ['tripletriad', 'grid', 'moba', 'tower_climb', 'tetris', 'breakout', 'pong', 'g2048', 'snake', 'pacman'] +#env_names = ['grid', 'breakout', 'g2048'] +#env_names = ['grid'] + +# Create a list of DataFrames for each environment +dfs = [cached_sweep_load(f'experiments/logs/puffer_{name}', name) for name in env_names] + +# Concatenate all DataFrames into a single DataFrame +EXPERIMENTS = pd.concat(dfs, ignore_index=True) +EXPERIMENTS.set_index('env_name', inplace=True) app = Dash() app.css.append_css({'external_stylesheets': 'dash.css'}) @@ -266,6 +280,21 @@ def cached_sweep_load(path): ]), dcc.Graph(id='hyper-agg'), + + html.Br(), + html.Label([ + "Threshold: ", + dcc.Slider( + id='pca-slider', + min=0.0, + max=1.0, + step=0.05, + value=0.5, + marks={i: str(0.05*i) for i in range(0, 21)} + ) + ]), + dcc.Graph(id='pca'), + ], style={"width": 1280} ) @@ -281,11 +310,16 @@ def update_optimal_plot(xkey, ykey, zkey): all_y = [] all_z = [] all_env = [] - for env in EXPERIMENTS: - all_x += EXPERIMENTS[env][xkey] - all_y += EXPERIMENTS[env][ykey] - all_z += EXPERIMENTS[env][zkey] - all_env += [env] * len(EXPERIMENTS[env][xkey]) + for env in env_names: + env_data = EXPERIMENTS.loc[env] + all_x.append(env_data[xkey].copy()) + all_y.append(env_data[ykey].copy()) + all_z.append(env_data[zkey].copy()) + all_env += [env] * len(env_data[xkey]) + + all_x = np.concatenate(all_x) + all_y = np.concatenate(all_y) + all_z = np.concatenate(all_z) f = px.scatter_3d(x=all_x, y=all_y, z=all_z, color=all_env, log_x=True, log_y=True, log_z=False, color_discrete_sequence=roygbiv) layout_dict = { 'title': dict(text='Pareto', font=TITLE_FONT), @@ -339,12 +373,13 @@ def update_optimal_plot(xkey, ykey, zkey): Input("scatter-dropdown-z", "value") ) def update_scatter(env, xkey, ykey, zkey): - x = EXPERIMENTS[env][xkey] - y = EXPERIMENTS[env][ykey] - z = EXPERIMENTS[env][zkey] + env_data = EXPERIMENTS.loc[env] + x = env_data[xkey] + y = env_data[ykey] + z = env_data[zkey] mmin = min(z) mmax = max(z) - thresh = np.geomspace(mmin, mmax, 8) + thresh = np.linspace(mmin, mmax, 8) all_fx = [] all_fy = [] bin_label = [] @@ -356,12 +391,12 @@ def update_scatter(env, xkey, ykey, zkey): fy = [y[i] for i in idxs] all_fx += fx all_fy += fy - bin_label += [str(j)] * len(fx) + bin_label += [str(thresh[j])] * len(fx) f = px.scatter(x=all_fx, y=all_fy, color=bin_label, color_discrete_sequence=roygbiv) f.update_traces(marker_size=10) layout_dict = { 'title': dict(text='Experiments', font=TITLE_FONT), - 'showlegend': False, + 'showlegend': True, 'legend': dict(font=LEGEND_FONT), 'plot_bgcolor': PLOT_BG_COLOR, 'paper_bgcolor': PAPER_BG_COLOR, @@ -387,17 +422,18 @@ def update_scatter(env, xkey, ykey, zkey): Input("hyper-box-x", "value") ) def update_hyper_box(x): - buckets = 8 + buckets = 4 env_data = {} - for env in EXPERIMENTS: - steps = EXPERIMENTS[env]['agent_steps'] - costs = EXPERIMENTS[env]['cost'] - scores = EXPERIMENTS[env]['environment/score'] + for env in env_names: + data = EXPERIMENTS.loc[env] + steps = data['agent_steps'] + costs = data['cost'] + scores = data['environment/score'] x_data = costs if x == 'cost' else steps hyper_data = {} env_data[env] = {'x': x_data, 'hypers': hyper_data} for h in HYPERS: - hyper_data[h] = EXPERIMENTS[env][h] + hyper_data[h] = data[h] all_x = [x for env in env_data for x in env_data[env]['x']] x_min, x_max = min(all_x), max(all_x) bucket_edges = np.linspace(x_min, x_max, buckets + 1) @@ -416,6 +452,7 @@ def update_hyper_box(x): bucket_means.append(np.mean(hyper_vals[idxs])) heatmap_data[i, j] = np.mean(bucket_means) if bucket_means else np.nan heatmap_data = np.log(heatmap_data) + heatmap_data -= heatmap_data[:, 0, None] # Normalize f = px.imshow(heatmap_data, x=bucket_centers, y=HYPERS, color_continuous_scale='Viridis', zmin=np.nanmin(heatmap_data), zmax=np.nanmax(heatmap_data), labels=dict(color="Value")) layout_dict = { 'title': dict(text="Hyperparameter Drift", font=TITLE_FONT), @@ -465,10 +502,11 @@ def update_hyper_agg_plot(thresh, step_range): f.update_xaxes(showgrid=False) f.update_yaxes(showgrid=False) - for i, env in enumerate(EXPERIMENTS): - steps = EXPERIMENTS[env]['agent_steps'] - costs = EXPERIMENTS[env]['cost'] - scores = EXPERIMENTS[env]['environment/score'] + for i, env in enumerate(env_names): + env_data = EXPERIMENTS.loc[env] + steps = env_data['agent_steps'] + costs = env_data['cost'] + scores = env_data['environment/score'] max_score = max(scores) max_steps = max(steps) @@ -480,7 +518,7 @@ def update_hyper_agg_plot(thresh, step_range): continue for k, hyper in enumerate(HYPERS): - y = [EXPERIMENTS[env][hyper][i] for i in idxs] + y = [env_data[hyper][i] for i in idxs] ymin = min(y) ymax = max(y) @@ -499,5 +537,100 @@ def update_hyper_agg_plot(thresh, step_range): return f +@app.callback( + Output("pca", "figure"), + Input("pca-slider", "value"), +) +def update_pca_plot(thresh): + # Initialize figure + f = go.Figure() + f.update_layout( + title=dict(text='Hyperparameter Stable Range', font=TITLE_FONT), + xaxis=dict(title=dict(text='Value', font=AXIS_FONT), tickfont=TICK_FONT), + yaxis=dict(title=dict(text='Hyper', font=AXIS_FONT), tickfont=TICK_FONT), + showlegend=True, + legend=dict(font=LEGEND_FONT), + plot_bgcolor=PLOT_BG_COLOR, + paper_bgcolor=PAPER_BG_COLOR, + width=1280, + height=720, + autosize=False, + xaxis_type='log', + barmode='overlay', # Overlay bars instead of stacking + ) + f.update_xaxes(showgrid=False) + f.update_yaxes(showgrid=False) + + filtered = {env: [] for env in env_names} + for env in env_names: + env_data = EXPERIMENTS.loc[env] + perf = env_data['environment/perf'] + idxs = [i for i in range(len(perf)) if perf[i] > thresh] + for hyper in HYPERS: + filt = np.array([env_data[hyper][i] for i in idxs]) + mmin = np.array(env_data[f'sweep/{hyper}/min']) + mmin = [mmin[i] for i in idxs] + mmax = env_data[f'sweep/{hyper}/max'] + mmax = np.array([mmax[i] for i in idxs]) + distribution = env_data[f'sweep/{hyper}/distribution'][0] + #if 'uniform' in distribution: + # #filt = (filt - mmin) / (mmax - mmin) + # pass + if 'log' in distribution or 'pow2' in distribution: + filt = np.log(filt) + #filt = (np.log(filt) - np.log(mmin)) / (np.log(mmax) - np.log(mmin)) + + filtered[env].append(filt) + + filtered[env] = np.array(filtered[env]).T + + training = np.concatenate(list(filtered.values()), axis=0) + from sklearn.decomposition import PCA + pca = PCA(n_components=2) + pca.fit(training) + + all_x = [] + all_y = [] + all_z = [] + for i, env in enumerate(filtered): + if filtered[env].shape[0] == 0: + continue + + reduced = pca.transform(filtered[env]) + x, y = reduced[:, 0], reduced[:, 1] + all_x.append(x) + all_y.append(y) + all_z.append([env]*len(x)) + + all_x = np.concatenate(all_x) + all_y = np.concatenate(all_y) + all_z = np.concatenate(all_z) + f = px.scatter(x=all_x, y=all_y, color=all_z, color_discrete_sequence=roygbiv) + + f.update_traces(marker_size=10) + layout_dict = { + 'title': dict(text='Experiments', font=TITLE_FONT), + 'showlegend': True, + 'legend': dict(font=LEGEND_FONT), + 'plot_bgcolor': PLOT_BG_COLOR, + 'paper_bgcolor': PAPER_BG_COLOR, + 'width': 1280, + 'height': 720, + 'autosize': False, + 'xaxis': dict( + title=dict(text='principal component 1', font=AXIS_FONT), + tickfont=TICK_FONT, + showgrid=False + ), + 'yaxis': dict( + title=dict(text='principal component 2', font=AXIS_FONT), + tickfont=TICK_FONT, + showgrid=False + ) + } + f.update_layout(**layout_dict) + return f + + if __name__ == '__main__': app.run(host='0.0.0.0', port=8000) From 80c57121c2a98086df476e098287a9d7c0a8403a Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 10 Oct 2025 21:09:17 +0000 Subject: [PATCH 051/188] progress --- constellation.py | 520 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 387 insertions(+), 133 deletions(-) diff --git a/constellation.py b/constellation.py index 68c09a744..a0bed137f 100644 --- a/constellation.py +++ b/constellation.py @@ -76,6 +76,38 @@ 'environment/perf' ] + HYPERS +SCATTER_COLOR = ['env_name'] + ALL_KEYS + +import colorsys +import numpy as np + +def rgb_to_hex(rgb): + """Convert RGB tuple to hex string.""" + return '#%02x%02x%02x' % (int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255)) + +def generate_distinct_palette(n): + """ + Generate a palette with n maximally distinct colors across the hue spectrum. + + Parameters: + n (int): Number of colors to generate. + + Returns: + list: List of hex color strings. + """ + if n < 1: + raise ValueError("n must be at least 1") + + # Generate hues evenly spaced across the spectrum (0 to 1) + hues = np.linspace(0, 1, n, endpoint=False) + colors = [] + for hue in hues: + # Use full saturation and value for vivid colors + rgb = colorsys.hsv_to_rgb(hue, 1.0, 1.0) + colors.append(rgb) + hex_colors = [rgb_to_hex(color) for color in colors] + return hex_colors + def pareto_idx(steps, costs, scores): idxs = [] for i in range(len(steps)): @@ -87,6 +119,16 @@ def pareto_idx(steps, costs, scores): return idxs +def build_dataset(dataframe): + dataset = [] + for hyper in HYPERS: + dat = dataframe[hyper] + #mmin = dataframe[f'sweep/{hyper}/min'] + #mmax = dataframe[f'sweep/{hyper}/max'] + #distribution = dataframe[f'sweep/{hyper}/distribution'] + + + def load_sweep_data(path): data = {} keys = None @@ -160,16 +202,52 @@ def cached_sweep_load(path, env_name): return pd.DataFrame(df_data) +def compute_tsne(): + dataset = EXPERIMENTS[HYPERS].copy() # Create a copy to avoid modifying the original + + # Normalize each hyperparameter column using its corresponding min and max columns + for hyper in HYPERS: + min_col = f'sweep/{hyper}/min' + max_col = f'sweep/{hyper}/max' + + mmin = min(EXPERIMENTS[min_col]) + mmax = max(EXPERIMENTS[max_col]) + + distribution = EXPERIMENTS[f'sweep/{hyper}/distribution'] + if 'log' in distribution or 'pow2' in distribution: + mmin = np.log(mmin) + mmax = np.log(mmax) + normed = np.log(dataset[hyper]) + else: + normed = dataset[hyper] + + dataset[hyper] = (normed - mmin) / (mmax - mmin) + # Normalize: (value - min) / (max - min) for each row + + #dataset[hyper] = (dataset[hyper] - EXPERIMENTS[min_col]) / (EXPERIMENTS[max_col] - EXPERIMENTS[min_col]) + + # Filter dataset based on performance threshold + # Apply TSNE + from sklearn.manifold import TSNE + proj = TSNE(n_components=2) + reduced = proj.fit_transform(dataset) + EXPERIMENTS['tsne1'] = reduced[:, 0] + EXPERIMENTS['tsne2'] = reduced[:, 1] + env_names = ['tripletriad', 'grid', 'moba', 'tower_climb', 'tetris', 'breakout', 'pong', 'g2048', 'snake', 'pacman'] +env_all = ['all'] + env_names #env_names = ['grid', 'breakout', 'g2048'] #env_names = ['grid'] +roygbiv = generate_distinct_palette(len(env_names)) + # Create a list of DataFrames for each environment dfs = [cached_sweep_load(f'experiments/logs/puffer_{name}', name) for name in env_names] # Concatenate all DataFrames into a single DataFrame EXPERIMENTS = pd.concat(dfs, ignore_index=True) -EXPERIMENTS.set_index('env_name', inplace=True) +#EXPERIMENTS.set_index('env_name', inplace=True) +compute_tsne() app = Dash() app.css.append_css({'external_stylesheets': 'dash.css'}) @@ -211,94 +289,189 @@ def cached_sweep_load(path, env_name): "Environment: ", dcc.Dropdown( id="scatter-dropdown-env", - options=[{"label": key, "value": key} for key in env_names], - value="breakout", + options=[{"label": key, "value": key} for key in env_all], + value="all", style={"width": "50%"} ) ]), + html.Br(), html.Label([ "X: ", dcc.Dropdown( id="scatter-dropdown-x", options=[{"label": key, "value": key} for key in ALL_KEYS], - value="cost", + value="train/learning_rate", style={"width": "50%"} - ) + ), + dcc.Checklist( + id="scatter-checkbox-logx", + options=[{"label": "Log", "value": "log"}], + value=["log"], + style={"display": "inline-block", "margin-left": "10px"} + ), ]), + html.Br(), html.Label([ "Y: ", dcc.Dropdown( id="scatter-dropdown-y", options=[{"label": key, "value": key} for key in ALL_KEYS], - value="environment/score", + value="environment/perf", + style={"width": "50%"} + ), + dcc.Checklist( + id="scatter-checkbox-logy", + options=[{"label": "Log", "value": "log"}], + value=[], + style={"display": "inline-block", "margin-left": "10px"} + ), + + ]), + html.Br(), + html.Label([ + "Color: ", + dcc.Dropdown( + id="scatter-dropdown-color", + options=[{"label": key, "value": key} for key in SCATTER_COLOR], + value="env_name", style={"width": "50%"} ) ]), + html.Br(), html.Label([ - "Z: ", + "Range 1: ", dcc.Dropdown( - id="scatter-dropdown-z", + id="scatter-dropdown-range-1", options=[{"label": key, "value": key} for key in ALL_KEYS], value="agent_steps", style={"width": "50%"} - ) + ), + dcc.RangeSlider( + id='scatter-range-1', + min=0.0, + max=1.0, + step=0.05, + value=[0.0, 0.25] + ), ]), - dcc.Graph(id='scatter'), html.Br(), - html.Label([ - "X Axis: ", + "Range 2: ", dcc.Dropdown( - id="hyper-box-x", - options=[{"label": key, "value": key} for key in ['cost', 'agent_steps']], - value="agent_steps", + id="scatter-dropdown-range-2", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="cost", style={"width": "50%"} - ) + ), + dcc.RangeSlider( + id='scatter-range-2', + min=0.0, + max=1.0, + step=0.05, + value=[0.0, 0.95] + ), ]), - dcc.Graph(id='hyper-box'), + dcc.Graph(id='scatter'), + html.Br(), + + #html.Label([ + # "X Axis: ", + # dcc.Dropdown( + # id="hyper-box-x", + # options=[{"label": key, "value": key} for key in ['cost', 'agent_steps']], + # value="agent_steps", + # style={"width": "50%"} + # ) + #]), + #dcc.Graph(id='hyper-box'), + html.Br(), html.Label([ - "Score Threshold %: ", - dcc.Slider( - id='hyper-agg-slider', + "Range 1: ", + dcc.Dropdown( + id="hyper-dropdown-range-1", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="environment/perf", + style={"width": "50%"} + ), + dcc.RangeSlider( + id='hyper-range-1', min=0.0, max=1.0, step=0.05, - value=0.95, - marks={i: str(0.05*i) for i in range(0, 21)} - ) + value=[0.8, 1.0] + ), ]), + html.Br(), html.Label([ - "Steps Interval: ", + "Range 2: ", + dcc.Dropdown( + id="hyper-dropdown-range-2", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="agent_steps", + style={"width": "50%"} + ), dcc.RangeSlider( - id='hyper-agg-range', + id='hyper-range-2', min=0.0, max=1.0, - step=0.1, + step=0.05, value=[0.0, 1.0] - ) + ), ]), - dcc.Graph(id='hyper-agg'), + dcc.Graph(id='hyper'), html.Br(), html.Label([ - "Threshold: ", - dcc.Slider( - id='pca-slider', + "Range 1: ", + dcc.Dropdown( + id="tsnee-dropdown-range-1", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="environment/perf", + style={"width": "50%"} + ), + dcc.RangeSlider( + id='tsnee-range-1', + min=0.0, + max=1.0, + step=0.05, + value=[0.5, 1.0] + ), + ]), + html.Br(), + html.Label([ + "Range 2: ", + dcc.Dropdown( + id="tsnee-dropdown-range-2", + options=[{"label": key, "value": key} for key in ALL_KEYS], + value="cost", + style={"width": "50%"} + ), + dcc.RangeSlider( + id='tsnee-range-2', min=0.0, max=1.0, step=0.05, - value=0.5, - marks={i: str(0.05*i) for i in range(0, 21)} - ) + value=[0.0, 1.0] + ), ]), - dcc.Graph(id='pca'), + dcc.Graph(id='tsnee'), ], style={"width": 1280} ) +import plotly.express as px +import plotly.graph_objects as go +import numpy as np +from scipy.spatial.distance import cdist + +# Assuming EXPERIMENTS is your pandas DataFrame, and xkey, ykey, zkey are defined. +# Also assuming percentages for cutoffs, e.g.: +percentage1 = 5.0 # Percentage for XYZ distance threshold relative to plot diagonal in transformed space +percentage2 = 0.5 # Percentage for PCA distance threshold relative to PCA diagonal + @app.callback( Output("optimal", "figure"), Input("optimal-dropdown-x", "value"), @@ -306,21 +479,84 @@ def cached_sweep_load(path, env_name): Input("optimal-dropdown-z", "value") ) def update_optimal_plot(xkey, ykey, zkey): - all_x = [] - all_y = [] - all_z = [] - all_env = [] - for env in env_names: - env_data = EXPERIMENTS.loc[env] - all_x.append(env_data[xkey].copy()) - all_y.append(env_data[ykey].copy()) - all_z.append(env_data[zkey].copy()) - all_env += [env] * len(env_data[xkey]) - - all_x = np.concatenate(all_x) - all_y = np.concatenate(all_y) - all_z = np.concatenate(all_z) - f = px.scatter_3d(x=all_x, y=all_y, z=all_z, color=all_env, log_x=True, log_y=True, log_z=False, color_discrete_sequence=roygbiv) + all_x = EXPERIMENTS[xkey].values + all_y = EXPERIMENTS[ykey].values + all_z = EXPERIMENTS[zkey].values + all_pca1 = EXPERIMENTS['tsne1'].values + all_pca2 = EXPERIMENTS['tsne2'].values + all_env = EXPERIMENTS['env_name'].values# Handle transformed coordinates for XYZ (accounting for log axes) + trans_x = np.log10(all_x) # Assuming all_x > 0 + trans_y = np.log10(all_y) # Assuming all_y > 0 + trans_z = all_z + points_trans_xyz = np.column_stack((trans_x, trans_y, trans_z)) + + # Compute ranges in transformed space + range_tx = np.max(trans_x) - np.min(trans_x) + range_ty = np.max(trans_y) - np.min(trans_y) + range_tz = np.max(trans_z) - np.min(trans_z) + diagonal_xyz = np.sqrt(range_tx**2 + range_ty**2 + range_tz**2) + delta1 = (percentage1 / 100.0) * diagonal_xyz + + # For PCA (assuming linear scales) + points_pca = np.column_stack((all_pca1, all_pca2)) + range_p1 = np.max(all_pca1) - np.min(all_pca1) + range_p2 = np.max(all_pca2) - np.min(all_pca2) + diagonal_pca = np.sqrt(range_p1**2 + range_p2**2) + delta2 = (percentage2 / 100.0) * diagonal_pca + + # Create the base scatter plot + f = px.scatter_3d( + x=all_x, + y=all_y, + z=all_z, + color=all_env, + log_x=True, + log_y=True, + log_z=False, + color_discrete_sequence=roygbiv + ) + + # Compute pairwise L2 distances in transformed spaces + dists_xyz = cdist(points_trans_xyz, points_trans_xyz) + dists_pca = cdist(points_pca, points_pca) + + # Create boolean masks + xyz_mask = dists_xyz < delta1 + pca_mask = dists_pca < delta2 + # Use boolean array for upper triangle to avoid type mismatch + triu_mask = np.triu(np.ones_like(dists_xyz, dtype=bool), k=1) + + # Combine masks with boolean operations + mask = xyz_mask & pca_mask & triu_mask + + # Get indices of valid pairs + i, j = np.where(mask) + + # Collect line segment coordinates (in original space) + line_x = [] + line_y = [] + line_z = [] + for k in range(len(i)): + line_x.extend([all_x[i[k]], all_x[j[k]], None]) + line_y.extend([all_y[i[k]], all_y[j[k]], None]) + line_z.extend([all_z[i[k]], all_z[j[k]], None]) + + # Add the lines as a single trace + if line_x: + f.add_trace( + go.Scatter3d( + x=line_x, + y=line_y, + z=line_z, + mode='lines', + line=dict(color='rgba(255,255,255,0.25)', width=2), + showlegend=False + ) + ) + + # Show the figure + f.show() + layout_dict = { 'title': dict(text='Pareto', font=TITLE_FONT), 'showlegend': True, @@ -369,30 +605,60 @@ def update_optimal_plot(xkey, ykey, zkey): Output("scatter", "figure"), Input("scatter-dropdown-env", "value"), Input("scatter-dropdown-x", "value"), + Input("scatter-checkbox-logx", "value"), Input("scatter-dropdown-y", "value"), - Input("scatter-dropdown-z", "value") + Input("scatter-checkbox-logy", "value"), + Input("scatter-dropdown-color", "value"), + Input("scatter-dropdown-range-1", "value"), + Input("scatter-range-1", "value"), + Input("scatter-dropdown-range-2", "value"), + Input("scatter-range-2", "value"), ) -def update_scatter(env, xkey, ykey, zkey): - env_data = EXPERIMENTS.loc[env] +def update_scatter(env, xkey, logx, ykey, logy, zkey, range1_key, range1, range2_key, range2): + #env_data = EXPERIMENTS.loc[env] + if env == 'all': + env_data = EXPERIMENTS + else: + env_data = EXPERIMENTS[EXPERIMENTS['env_name'] == env] + + range1_mmin = min(EXPERIMENTS[range1_key]) + range1_mmax = max(EXPERIMENTS[range1_key]) + norm_range1 = (EXPERIMENTS[range1_key] - range1_mmin) / (range1_mmax - range1_mmin) + + range2_mmin = min(EXPERIMENTS[range2_key]) + range2_mmax = max(EXPERIMENTS[range2_key]) + norm_range2 = (EXPERIMENTS[range2_key] - range2_mmin) / (range2_mmax - range2_mmin) + + mask = (norm_range1 >= range1[0]) & (norm_range1 <= range1[1]) & (norm_range2 >= range2[0]) & (norm_range2 <= range2[1]) + + env_data = env_data[mask] + x = env_data[xkey] y = env_data[ykey] z = env_data[zkey] - mmin = min(z) - mmax = max(z) - thresh = np.linspace(mmin, mmax, 8) - all_fx = [] - all_fy = [] - bin_label = [] - for j in range(7): - idxs = [i for i, e in enumerate(z) if thresh[j] < e < thresh[j+1]] - if len(idxs) <= 2: - continue - fx = [x[i] for i in idxs] - fy = [y[i] for i in idxs] - all_fx += fx - all_fy += fy - bin_label += [str(thresh[j])] * len(fx) - f = px.scatter(x=all_fx, y=all_fy, color=bin_label, color_discrete_sequence=roygbiv) + + if zkey == 'env_name': + f = px.scatter(x=x, y=y, color=z, color_discrete_sequence=roygbiv) + else: + mmin = min(z) + mmax = max(z) + thresh = np.geomspace(mmin, mmax, 8) + all_fx = [] + all_fy = [] + bin_label = [] + for j in range(7): + filter = (thresh[j] < z) & (z < thresh[j+1]) + if filter.sum() <= 2: + continue + + fx = x[filter] + fy = y[filter] + all_fx += fx.tolist() + all_fy += fy.tolist() + bin_label += [str(thresh[j])] * len(fx) + + f = px.scatter(x=all_fx, y=all_fy, color=bin_label, color_discrete_sequence=roygbiv) + f.update_traces(marker_size=10) layout_dict = { 'title': dict(text='Experiments', font=TITLE_FONT), @@ -406,12 +672,14 @@ def update_scatter(env, xkey, ykey, zkey): 'xaxis': dict( title=dict(text=xkey, font=AXIS_FONT), tickfont=TICK_FONT, - showgrid=False + showgrid=False, + type='log' if 'log' in logx else 'linear', ), 'yaxis': dict( title=dict(text=ykey, font=AXIS_FONT), tickfont=TICK_FONT, - showgrid=False + showgrid=False, + type='log' if 'log' in logy else 'linear', ) } f.update_layout(**layout_dict) @@ -425,7 +693,8 @@ def update_hyper_box(x): buckets = 4 env_data = {} for env in env_names: - data = EXPERIMENTS.loc[env] + #data = EXPERIMENTS.loc[env] + data = EXPERIMENTS[EXPERIMENTS['env_name'] == env] steps = data['agent_steps'] costs = data['cost'] scores = data['environment/score'] @@ -478,11 +747,13 @@ def update_hyper_box(x): return f @app.callback( - Output("hyper-agg", "figure"), - Input("hyper-agg-slider", "value"), - Input("hyper-agg-range", "value") + Output("hyper", "figure"), + Input("hyper-dropdown-range-1", "value"), + Input("hyper-range-1", "value"), + Input("hyper-dropdown-range-2", "value"), + Input("hyper-range-2", "value"), ) -def update_hyper_agg_plot(thresh, step_range): +def update_hyper_plot(xkey, range1, ykey, range2): # Initialize figure f = go.Figure() f.update_layout( @@ -502,8 +773,21 @@ def update_hyper_agg_plot(thresh, step_range): f.update_xaxes(showgrid=False) f.update_yaxes(showgrid=False) + range1_mmin = min(EXPERIMENTS[xkey]) + range1_mmax = max(EXPERIMENTS[xkey]) + norm_x = (EXPERIMENTS[xkey] - range1_mmin) / (range1_mmax - range1_mmin) + range2_mmin = min(EXPERIMENTS[ykey]) + range2_mmax = max(EXPERIMENTS[ykey]) + norm_y = (EXPERIMENTS[ykey] - range2_mmin) / (range2_mmax - range2_mmin) + mask = (norm_x >= range1[0]) & (norm_x <= range1[1]) & (norm_y >= range2[0]) & (norm_y <= range2[1]) + filtered = EXPERIMENTS[mask] + for i, env in enumerate(env_names): - env_data = EXPERIMENTS.loc[env] + #env_data = EXPERIMENTS.loc[env] + env_data = filtered[filtered['env_name'] == env] + if len(env_data) < 2: + continue + steps = env_data['agent_steps'] costs = env_data['cost'] scores = env_data['environment/score'] @@ -511,14 +795,10 @@ def update_hyper_agg_plot(thresh, step_range): max_score = max(scores) max_steps = max(steps) n = len(scores) - idxs = [i for i in range(n) if scores[i] > thresh*max_score and - step_range[0] thresh] - for hyper in HYPERS: - filt = np.array([env_data[hyper][i] for i in idxs]) - mmin = np.array(env_data[f'sweep/{hyper}/min']) - mmin = [mmin[i] for i in idxs] - mmax = env_data[f'sweep/{hyper}/max'] - mmax = np.array([mmax[i] for i in idxs]) - distribution = env_data[f'sweep/{hyper}/distribution'][0] - #if 'uniform' in distribution: - # #filt = (filt - mmin) / (mmax - mmin) - # pass - if 'log' in distribution or 'pow2' in distribution: - filt = np.log(filt) - #filt = (np.log(filt) - np.log(mmin)) / (np.log(mmax) - np.log(mmin)) - - filtered[env].append(filt) - - filtered[env] = np.array(filtered[env]).T - - training = np.concatenate(list(filtered.values()), axis=0) - from sklearn.decomposition import PCA - pca = PCA(n_components=2) - pca.fit(training) - - all_x = [] - all_y = [] - all_z = [] - for i, env in enumerate(filtered): - if filtered[env].shape[0] == 0: - continue - - reduced = pca.transform(filtered[env]) - x, y = reduced[:, 0], reduced[:, 1] - all_x.append(x) - all_y.append(y) - all_z.append([env]*len(x)) - - all_x = np.concatenate(all_x) - all_y = np.concatenate(all_y) - all_z = np.concatenate(all_z) - f = px.scatter(x=all_x, y=all_y, color=all_z, color_discrete_sequence=roygbiv) + range1_mmin = min(EXPERIMENTS[xkey]) + range1_mmax = max(EXPERIMENTS[xkey]) + norm_x = (EXPERIMENTS[xkey] - range1_mmin) / (range1_mmax - range1_mmin) + range2_mmin = min(EXPERIMENTS[ykey]) + range2_mmax = max(EXPERIMENTS[ykey]) + norm_y = (EXPERIMENTS[ykey] - range2_mmin) / (range2_mmax - range2_mmin) + mask = (norm_x >= range1[0]) & (norm_x <= range1[1]) & (norm_y >= range2[0]) & (norm_y <= range2[1]) + filtered = EXPERIMENTS[mask] + + f = px.scatter( + x=filtered['tsne1'], + y=filtered['tsne2'], + color=filtered['env_name'], + color_discrete_sequence=roygbiv + ) f.update_traces(marker_size=10) layout_dict = { @@ -618,12 +872,12 @@ def update_pca_plot(thresh): 'height': 720, 'autosize': False, 'xaxis': dict( - title=dict(text='principal component 1', font=AXIS_FONT), + title=dict(text='TSNE-1', font=AXIS_FONT), tickfont=TICK_FONT, showgrid=False ), 'yaxis': dict( - title=dict(text='principal component 2', font=AXIS_FONT), + title=dict(text='TSNE-2', font=AXIS_FONT), tickfont=TICK_FONT, showgrid=False ) From 3e6dd21ac3b9aa2900f312a6aa4bc4583676d228 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Sat, 11 Oct 2025 15:06:25 +0000 Subject: [PATCH 052/188] constellation cache --- cache_data.py | 181 ++++ pufferlib/ocean/constellation/constellation.c | 840 ++++++++++++++++++ 2 files changed, 1021 insertions(+) create mode 100644 cache_data.py create mode 100644 pufferlib/ocean/constellation/constellation.c diff --git a/cache_data.py b/cache_data.py new file mode 100644 index 000000000..29312b6df --- /dev/null +++ b/cache_data.py @@ -0,0 +1,181 @@ +import numpy as np + +import json +import glob +import os + + +env_names = sorted([ + 'breakout', + 'impulse_wars', + 'pacman', + 'tetris', + 'g2048', + 'moba', + 'pong', + 'tower_climb', + 'grid', + 'nmmo3', + 'snake', + 'tripletriad' +]) + +HYPERS = [ + 'train/learning_rate', + 'train/ent_coef', + 'train/gamma', + 'train/gae_lambda', + 'train/vtrace_rho_clip', + 'train/vtrace_c_clip', + 'train/clip_coef', + 'train/vf_clip_coef', + 'train/vf_coef', + 'train/max_grad_norm', + 'train/adam_beta1', + 'train/adam_beta2', + 'train/adam_eps', + 'train/prio_alpha', + 'train/prio_beta0', + 'train/bptt_horizon', + 'train/num_minibatches', + 'train/minibatch_size', + 'policy/hidden_size', + 'env/num_envs', +] + +ALL_KEYS = [ + 'agent_steps', + 'cost', + 'environment/score', + 'environment/perf' +] + HYPERS + +def pareto_idx(steps, costs, scores): + idxs = [] + for i in range(len(steps)): + better = [scores[j] >= scores[i] and + costs[j] < costs[i] and steps[j] < steps[i] + for j in range(len(scores))] + if not any(better): + idxs.append(i) + + return idxs + +def load_sweep_data(path): + data = {} + keys = None + for fpath in glob.glob(path): + if 'cache.json' in fpath: + continue + + with open(fpath, 'r') as f: + exp = json.load(f) + + if not data: + for kk in exp.keys(): + if kk == 'data': + for k, v in exp[kk][-1].items(): + data[k] = [] + else: + data[kk] = [] + + discard = False + for kk in list(data.keys()): + if kk not in exp and kk not in exp['data'][-1]: + discard = True + break + + if discard: + continue + + for kk in list(data.keys()): + if kk in exp: + v = exp[kk] + sweep_key = f'sweep/{kk}/distribution' + if sweep_key in data and exp[sweep_key] == 'logit_normal': + v = 1 - v + elif kk in ('train/vtrace_rho_clip', 'train/vtrace_c_clip'): + v = max(v, 0.1) + + data[kk].append(v) + else: + data[kk].append(exp['data'][-1][kk]) + + steps = data['agent_steps'] + costs = data['cost'] + scores = data['environment/score'] + + idxs = pareto_idx(steps, costs, scores) + + # Filter to pareto + for k in data: + data[k] = [data[k][i] for i in idxs] + + # Monkey patch: Cap performance + data['environment/perf'] = [min(e, 1.0) for e in data['environment/perf']] + + # Monkey patch: Adjust steps by frameskip if present + if 'env/frameskip' in data: + skip = data['env/frameskip'] + data['agent_steps'] = [n*m for n, m in zip(data['agent_steps'], skip)] + + return data + +def cached_sweep_load(path, env_name): + cache_file = os.path.join(path, 'c_cache.json') + if not os.path.exists(cache_file): + data = load_sweep_data(os.path.join(path, '*.json')) + with open(cache_file, 'w') as f: + json.dump(data, f) + + with open(cache_file, 'r') as f: + data = json.load(f) + + print(f'Loaded {env_name}') + return data + +def compute_tsne(): + data = {name: cached_sweep_load(f'experiments/logs/puffer_{name}', name) for name in env_names} + + flat = [] + flat_mmin = [] + flat_mmax = [] + for env in env_names: + flat.append(np.stack([data[env][hyper] for hyper in HYPERS], axis=1)) + flat_mmin.append(np.stack([data[env][f'sweep/{hyper}/min'] for hyper in HYPERS], axis=1)) + flat_mmax.append(np.stack([data[env][f'sweep/{hyper}/max'] for hyper in HYPERS], axis=1)) + + flat_distribution = [data[env][f'sweep/{hyper}/distribution'] for env in env_names for hyper in HYPERS] + + flat = np.concatenate(flat, axis=0) + flat_mmin = np.concatenate(flat_mmin, axis=0).min(axis=0) + flat_mmax = np.concatenate(flat_mmax, axis=0).max(axis=0) + + normed = flat.copy() + for i in range(len(HYPERS)): + dist = flat_distribution[i] + if 'log' in dist or 'pow2' in dist: + flat_mmin[i] = np.log(flat_mmin[i]) + flat_mmax[i] = np.log(flat_mmax[i]) + normed[:, i] = np.log(flat[:, i]) + + normed[:, i] = (normed[:, i] - flat_mmin[i]) / (flat_mmax[i] - flat_mmin[i]) + + from sklearn.manifold import TSNE + proj = TSNE(n_components=2) + reduced = proj.fit_transform(normed) + + row = 0 + for env in env_names: + for i, hyper in enumerate(HYPERS): + sz = len(data[env][hyper]) + data[env][hyper] = normed[row:row+sz, i].tolist() + + data[env]['tsne1'] = reduced[row:row+sz, 0].tolist() + data[env]['tsne2'] = reduced[row:row+sz, 1].tolist() + row += sz + + json.dump(data, open('all_cache.json', 'w')) + +if __name__ == '__main__': + compute_tsne() diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c new file mode 100644 index 000000000..9aa7d0e9b --- /dev/null +++ b/pufferlib/ocean/constellation/constellation.c @@ -0,0 +1,840 @@ +#include +#include +#include +#include +#include "raylib.h" + + +#define RAYGUI_IMPLEMENTATION +#include "raygui.h" + +#include "cJSON.h" + +const Color PUFF_RED = (Color){187, 0, 0, 255}; +const Color PUFF_CYAN = (Color){0, 187, 187, 255}; +const Color PUFF_WHITE = (Color){241, 241, 241, 241}; +const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; + +const float EMPTY = -4242.0f; + +#define SEP 4 +#define SETTINGS_HEIGHT 20 +#define TOGGLE_WIDTH 60 +#define DROPDOWN_WIDTH 200 + +typedef struct { + char *key; + float *values; + int size; +} KeyValue; + + +typedef struct PlotArgs { + float x_min; + float x_max; + float y_min; + float y_max; + float z_min; + float z_max; + int width; + int height; + int title_font_size; + int axis_font_size; + int axis_tick_font_size; + int legend_font_size; + int line_width; + int tick_length; + int x_margin; + int y_margin; + Color font_color; + Color background_color; + Color axis_color; + char* x_label; + char* y_label; + char* z_label; + Font font; + Font font_small; +} PlotArgs; + +PlotArgs DEFAULT_PLOT_ARGS = { + .x_min = EMPTY, + .x_max = EMPTY, + .y_min = EMPTY, + .y_max = EMPTY, + .z_min = EMPTY, + .z_max = EMPTY, + .width = 960, + .height = 540 - SETTINGS_HEIGHT, + .title_font_size = 24, + .axis_font_size = 24, + .axis_tick_font_size = 12, + .legend_font_size = 12, + .line_width = 2, + .tick_length = 8, + .x_margin = 70, + .y_margin = 70, + .font_color = PUFF_WHITE, + .background_color = PUFF_BACKGROUND, + .axis_color = PUFF_WHITE, + .x_label = "Cost", + .y_label = "Score", + .z_label = "Train/Learning Rate", +}; + +const char* format_tick_label(double value) { + static char buffer[32]; + int precision = 2; + + if (fabs(value) < 1e-10) { + strcpy(buffer, "0"); + return buffer; + } + + if (fabs(value) < 0.01 || fabs(value) > 10000) { + snprintf(buffer, sizeof(buffer), "%.2e", value); + } else { + snprintf(buffer, sizeof(buffer), "%.*f", precision, value); + + char *end = buffer + strlen(buffer) - 1; + while (end > buffer && *end == '0') *end-- = '\0'; + if (end > buffer && *end == '.') *end = '\0'; + } + + return buffer; +} + +void draw_axes(PlotArgs args) { + int width = args.width; + int height = args.height; + + // Draw axes + DrawLine(args.x_margin, args.y_margin, + args.x_margin, height - args.y_margin, PUFF_WHITE); + DrawLine(args.x_margin, height - args.y_margin, + width - args.x_margin, height - args.y_margin, PUFF_WHITE); + + // X label + Vector2 x_font_size = MeasureTextEx(args.font, args.x_label, args.axis_font_size, 0); + DrawTextEx( + args.font, + args.x_label, + (Vector2){ + width/2 - x_font_size.x/2, + height - x_font_size.y, + }, + args.axis_font_size, + 0, + PUFF_WHITE + ); + + // Y label + Vector2 y_font_size = MeasureTextEx(args.font, args.y_label, args.axis_font_size, 0); + DrawTextPro( + args.font, + args.y_label, + (Vector2){ + 0, + height/2 + y_font_size.x/2 + }, + (Vector2){ 0, 0 }, + -90, + args.axis_font_size, + 0, + PUFF_WHITE + ); + + // Autofit number of ticks + Vector2 tick_label_size = MeasureTextEx(args.font, "estimate", args.axis_font_size, 0); + int num_x_ticks = (width - 2*args.x_margin)/tick_label_size.x; + int num_y_ticks = (height - 2*args.y_margin)/tick_label_size.x; + + // X ticks + for (int i=0; i max) max = ary[i]; + } + return max; +} + +void boxplot(float* mmin, float* mmax, bool log_x, int num_points, PlotArgs args) { + int width = args.width; + int height = args.height; + + // Find min/max for scaling + //float z_min = args.z_min == EMPTY ? ary_min(z, num_points) : args.z_min; + //float z_max = args.z_max == EMPTY ? ary_max(z, num_points) : args.z_max; + + float x_min = args.x_min; + float x_max = args.x_max; + + if (log_x) { + x_min = x_min<=1e-8 ? -8 : log10(x_min); + x_max = x_max<=1e-8 ? -8 : log10(x_max); + } + + float dx = x_max - x_min; + if (dx == 0) dx = 1.0f; + x_min -= 0.1f * dx; x_max += 0.1f * dx; + dx = x_max - x_min; + float dy = (height - 2*args.y_margin)/((float)num_points); + + for (int j=0; jx_min = 1e-8; + args->x_max = 1e8; + + for (int i=0; ix_min = fmin(args->x_min, values[j]); + //args->x_max = fmax(args->x_max, values[j]); + } + } +} + + + +int main(void) { + FILE *file = fopen("pufferlib/ocean/constellation/data.json", "r"); + if (!file) { + printf("Error opening file\n"); + return 1; + } + + fseek(file, 0, SEEK_END); + long file_size = ftell(file); + fseek(file, 0, SEEK_SET); + char *json_str = malloc(file_size + 1); + if (!json_str) { + printf("Memory allocation error\n"); + fclose(file); + return 1; + } + + // Read file into buffer + fread(json_str, 1, file_size, file); + json_str[file_size] = '\0'; + fclose(file); + + cJSON *root = cJSON_Parse(json_str); + if (!root) { + printf("JSON parse error: %s\n", cJSON_GetErrorPtr()); + free(json_str); + return 1; + } + + if (!cJSON_IsObject(root)) { + printf("Error: Root is not an object\n"); + return cleanup(NULL, 0, root, json_str); + } + + int map_count = 0; + cJSON *item = root->child; + while (item) { + map_count++; + item = item->next; + } + KeyValue *map = calloc(map_count, sizeof(KeyValue)); + if (!map) { + printf("Memory allocation error\n"); + return cleanup(NULL, 0, root, json_str); + } + + // Load all keys and their float arrays + int hyper_count = 0; + int idx = 0; + item = root->child; + while (item) { + map[idx].key = strdup(item->string); + if (strncmp(map[idx].key, "train", 5) == 0) { + hyper_count++; + } + if (!map[idx].key) { + printf("Memory allocation error for key\n"); + return cleanup(map, map_count, root, json_str); + } + + if (!cJSON_IsArray(item)) { + printf("Error: Value for key '%s' is not an array\n", map[idx].key); + return cleanup(map, map_count, root, json_str); + } + + int array_size = cJSON_GetArraySize(item); + map[idx].values = malloc(array_size * sizeof(float)); + if (!map[idx].values) { + printf("Memory allocation error for values\n"); + return cleanup(map, map_count, root, json_str); + } + + map[idx].size = array_size; + + for (int j = 0; j < array_size; j++) { + cJSON *sub = cJSON_GetArrayItem(item, j); + if (cJSON_IsNumber(sub)) { + map[idx].values[j] = (float)sub->valuedouble; + } else { + continue; + printf("Error: Non-number in array for key '%s' at index %d\n", map[idx].key, j); + return cleanup(map, map_count, root, json_str); + } + } + + idx++; + item = item->next; + } + + // Create items as an array of strings + //if (map_count > 100) { + // map_count = 100; + //} + char **items = malloc(map_count * sizeof(char *)); + if (!items) { + printf("Memory allocation error\n"); + return cleanup(map, map_count, root, json_str); + } + for (int i = 0; i < map_count; i++) { + items[i] = map[i].key; // Or strdup if you need copies + } + + // Create options as a semicolon-separated string + size_t options_len = 0; + for (int i = 0; i < map_count; i++) { + options_len += strlen(map[i].key) + 1; // +1 for semicolon or null + } + char *options = malloc(options_len); + if (!options) { + printf("Memory allocation error\n"); + free(items); + return cleanup(map, map_count, root, json_str); + } + options[0] = '\0'; + for (int i = 0; i < map_count; i++) { + if (i > 0) strcat(options, ";"); + strcat(options, map[i].key); + } + + // Hypers + + hyper_count = 5; + char *hyper_key[5] = {"train/learning_rate", "train/gamma", "train/gae_lambda", "train/ent_coef", "train/vf_coef"}; + KeyValue hypers[5]; + for (int i=0; i<5; i++) { + hypers[i].key = hyper_key[i]; + hypers[i].values = get_values(map, map_count, hyper_key[i], &hypers[i].size); + } + float *box_mmin = malloc(hyper_count * sizeof(float)); + float *box_mmax = malloc(hyper_count * sizeof(float)); + + // Example usage: Print the arrays + // Cleanup + //free(cost_array); + //free(score_array); + //cJSON_Delete(root); + //free(json_str); + + //float *x = cost_array; + //float *y = score_array; + //float num_points = cost_size; + + //float *x = malloc(num_points * sizeof(float)); + //float *y = malloc(num_points * sizeof(float)); + + + // Initialize Raylib + InitWindow(2*DEFAULT_PLOT_ARGS.width, 2*DEFAULT_PLOT_ARGS.height + 2*SETTINGS_HEIGHT, "Puffer Constellation"); + ClearBackground(PUFF_BACKGROUND); + SetTargetFPS(60); + + DEFAULT_PLOT_ARGS.font = LoadFontEx("resources/shared/Montserrat-Regular.ttf", 24, NULL, 255); + DEFAULT_PLOT_ARGS.font_small = LoadFontEx("resources/shared/Montserrat-Regular.ttf", 12, NULL, 255); + + Camera3D camera = (Camera3D){ 0 }; + camera.position = (Vector3){ 10.0f, 10.0f, 10.0f }; + camera.target = (Vector3){ 0.0f, 0.0f, 0.0f }; + camera.up = (Vector3){ 0.0f, 1.0f, 0.0f }; + camera.fovy = 45.0f; + camera.projection = CAMERA_PERSPECTIVE; + PlotArgs args1 = DEFAULT_PLOT_ARGS; + RenderTexture2D fig1 = LoadRenderTexture(args1.width, args1.height); + bool fig1_x_active = false; + int fig1_x_idx = 2; + bool fig1_x_log = true; + bool fig1_y_active = false; + int fig1_y_idx = 6; + bool fig1_y_log = false; + bool fig1_z_active = false; + int fig1_z_idx = 1; + bool fig1_z_log = true; + + PlotArgs args2 = DEFAULT_PLOT_ARGS; + RenderTexture2D fig2 = LoadRenderTexture(args2.width, args2.height); + bool fig2_x_active = false; + int fig2_x_idx = 1; + bool fig2_y_active = false; + int fig2_y_idx = 0; + + PlotArgs args3 = DEFAULT_PLOT_ARGS; + args3.x_margin = 250; + RenderTexture2D fig3 = LoadRenderTexture(args3.width, args3.height); + bool fig3_x_active = false; + int fig3_x_idx = 3; + bool fig3_x_log = true; + bool fig3_y_active = false; + int fig3_y_idx = 0; + + PlotArgs args4 = DEFAULT_PLOT_ARGS; + RenderTexture2D fig4 = LoadRenderTexture(args4.width, args4.height); + bool fig4_x_active = false; + int fig4_x_idx = 4; + bool fig4_y_active = false; + int fig4_y_idx = 0; + + //char* items[] = {"environment/score", "cost", "train/learning_rate", "train/gamma", "train/gae_lambda"}; + //char options[] = "environment/score;cost;train/learning_rate;train/gamma;train/gae_lambda"; + + float* x; + float* y; + float* z; + int num_points; + char* x_label; + char* y_label; + char* z_label; + + while (!WindowShouldClose()) { + BeginDrawing(); + ClearBackground(PUFF_BACKGROUND); + + x_label = items[fig1_x_idx]; + y_label = items[fig1_y_idx]; + z_label = items[fig1_z_idx]; + args1.x_label = x_label; + args1.y_label = y_label; + args1.z_label = z_label; + x = get_values(map, map_count, x_label, &num_points); + y = get_values(map, map_count, y_label, &num_points); + z = get_values(map, map_count, z_label, &num_points); + args1.x_min = ary_min(x, num_points); + args1.x_max = ary_max(x, num_points); + args1.y_min = ary_min(y, num_points); + args1.y_max = ary_max(y, num_points); + args1.z_min = ary_min(z, num_points); + args1.z_max = ary_max(z, num_points); + float x_mid = fig1_x_log ? (log10(args1.x_max) + log10(args1.x_min))/2.0f : (args1.x_max + args1.x_min)/2.0f; + float y_mid = fig1_y_log ? (log10(args1.y_max) + log10(args1.y_min))/2.0f : (args1.y_max + args1.y_min)/2.0f; + float z_mid = fig1_z_log ? (log10(args1.z_max) + log10(args1.z_min))/2.0f : (args1.z_max + args1.z_min)/2.0f; + camera.target = (Vector3){x_mid, y_mid, z_mid}; + BeginTextureMode(fig1); + ClearBackground(PUFF_BACKGROUND); + BeginMode3D(camera); + UpdateCamera(&camera, CAMERA_ORBITAL); + plot3(x, y, z, fig1_x_log, fig1_y_log, fig1_z_log, num_points, args1); + draw_axes3(args1); + EndMode3D(); + EndTextureMode(); + DrawTextureRec( + fig1.texture, + (Rectangle){0, 0, fig1.texture.width, -fig1.texture.height }, + (Vector2){ 0, SETTINGS_HEIGHT }, WHITE + ); + Rectangle fig1_x_rect = {0, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig1_x_rect, options, &fig1_x_idx, fig1_x_active)){ + fig1_x_active = !fig1_x_active; + } + Rectangle fig1_x_check_rect = {DROPDOWN_WIDTH, 0, SETTINGS_HEIGHT, SETTINGS_HEIGHT}; + GuiCheckBox(fig1_x_check_rect, "Log X", &fig1_x_log); + Rectangle fig1_y_rect = {DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig1_y_rect, options, &fig1_y_idx, fig1_y_active)){ + fig1_y_active = !fig1_y_active; + } + Rectangle fig1_y_check_rect = {2*DROPDOWN_WIDTH+TOGGLE_WIDTH, 0, SETTINGS_HEIGHT, SETTINGS_HEIGHT}; + GuiCheckBox(fig1_y_check_rect, "Log Y", &fig1_y_log); + Rectangle fig1_z_rect = {2*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig1_z_rect, options, &fig1_z_idx, fig1_z_active)){ + fig1_z_active = !fig1_z_active; + } + Rectangle fig1_z_check_rect = {3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, SETTINGS_HEIGHT, SETTINGS_HEIGHT}; + GuiCheckBox(fig1_z_check_rect, "Log Z", &fig1_z_log); + + x_label = items[fig2_x_idx]; + y_label = items[fig2_y_idx]; + args2.x_label = x_label; + args2.y_label = y_label; + x = get_values(map, map_count, x_label, &num_points); + y = get_values(map, map_count, y_label, &num_points); + args2.x_min = ary_min(x, num_points); + args2.x_max = ary_max(x, num_points); + args2.y_min = ary_min(y, num_points); + args2.y_max = ary_max(y, num_points); + BeginTextureMode(fig2); + ClearBackground(PUFF_BACKGROUND); + plot(x, y, num_points, args2); + draw_axes(args2); + EndTextureMode(); + DrawTextureRec( + fig2.texture, + (Rectangle){ 0, 0, fig2.texture.width, -fig2.texture.height }, + (Vector2){ fig1.texture.width, SETTINGS_HEIGHT }, WHITE + ); + Rectangle fig2_x_rect = {fig1.texture.width, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig2_x_rect, options, &fig2_x_idx, fig2_x_active)){ + fig2_x_active = !fig2_x_active; + } + Rectangle fig2_y_rect = {fig1.texture.width + DROPDOWN_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig2_y_rect, options, &fig2_y_idx, fig2_y_active)){ + fig2_y_active = !fig2_y_active; + } + + compute_boxplot_data(hypers, box_mmin, box_mmax, hyper_count, &args3); + args3.x_label = "Value"; + args3.y_label = "Hyperparameter"; + BeginTextureMode(fig3); + ClearBackground(PUFF_BACKGROUND); + boxplot(box_mmin, box_mmax, fig3_x_log, hyper_count, args3); + //draw_axes(args3); + draw_box_axes(hypers, hyper_count, args3); + EndTextureMode(); + DrawTextureRec( + fig3.texture, + (Rectangle){ 0, 0, fig3.texture.width, -fig3.texture.height }, + (Vector2){ 0, fig1.texture.height + 2*SETTINGS_HEIGHT }, WHITE + ); + Rectangle fig3_x_rect = {0, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig3_x_rect, options, &fig3_x_idx, fig3_x_active)){ + fig3_x_active = !fig3_x_active; + } + Rectangle fig3_y_rect = {DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig3_y_rect, options, &fig3_y_idx, fig3_y_active)){ + fig3_y_active = !fig3_y_active; + } + + x_label = items[fig4_x_idx]; + y_label = items[fig4_y_idx]; + args4.x_label = x_label; + args4.y_label = y_label; + x = get_values(map, map_count, x_label, &num_points); + y = get_values(map, map_count, y_label, &num_points); + args4.x_min = ary_min(x, num_points); + args4.x_max = ary_max(x, num_points); + args4.y_min = ary_min(y, num_points); + args4.y_max = ary_max(y, num_points); + BeginTextureMode(fig4); + ClearBackground(PUFF_BACKGROUND); + plot(x, y, num_points, args4); + draw_axes(args4); + EndTextureMode(); + DrawTextureRec( + fig4.texture, + (Rectangle){ 0, 0, fig4.texture.width, -fig4.texture.height }, + (Vector2){ fig1.texture.width, fig1.texture.height + 2*SETTINGS_HEIGHT }, WHITE + ); + Rectangle fig4_x_rect = {fig1.texture.width, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig4_x_rect, options, &fig4_x_idx, fig4_x_active)){ + fig4_x_active = !fig4_x_active; + } + Rectangle fig4_y_rect = {fig1.texture.width + DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig4_y_rect, options, &fig4_y_idx, fig4_y_active)){ + fig4_y_active = !fig4_y_active; + } + + DrawFPS(GetScreenWidth() - 95, 10); + EndDrawing(); + } + + //free(x); + //free(y); + CloseWindow(); + return 0; +} From 6036f8175d028f785ea1e787f969f193f9860c82 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Sat, 11 Oct 2025 18:07:28 +0000 Subject: [PATCH 053/188] Prototype in color --- cache_data.py | 4 + pufferlib/ocean/constellation/constellation.c | 470 ++++++++++-------- 2 files changed, 258 insertions(+), 216 deletions(-) diff --git a/cache_data.py b/cache_data.py index 29312b6df..d1d5c0a13 100644 --- a/cache_data.py +++ b/cache_data.py @@ -167,10 +167,14 @@ def compute_tsne(): row = 0 for env in env_names: + ''' for i, hyper in enumerate(HYPERS): sz = len(data[env][hyper]) data[env][hyper] = normed[row:row+sz, i].tolist() + ''' + sz = len(data[env]['agent_steps']) + data[env] = {k: v for k, v in data[env].items() if k in ALL_KEYS} data[env]['tsne1'] = reduced[row:row+sz, 0].tolist() data[env]['tsne2'] = reduced[row:row+sz, 1].tolist() row += sz diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index 9aa7d0e9b..dbd6efc27 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -1,10 +1,11 @@ #include +#include #include #include #include +#include #include "raylib.h" - #define RAYGUI_IMPLEMENTATION #include "raygui.h" @@ -15,6 +16,12 @@ const Color PUFF_CYAN = (Color){0, 187, 187, 255}; const Color PUFF_WHITE = (Color){241, 241, 241, 241}; const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; +Color COLORS[] = { + DARKGRAY, MAROON, ORANGE, DARKGREEN, DARKBLUE, DARKPURPLE, DARKBROWN, + GRAY, RED, GOLD, LIME, BLUE, VIOLET, BROWN, LIGHTGRAY, PINK, YELLOW, + GREEN, SKYBLUE, PURPLE, BEIGE +}; + const float EMPTY = -4242.0f; #define SEP 4 @@ -24,10 +31,36 @@ const float EMPTY = -4242.0f; typedef struct { char *key; - float *values; - int size; -} KeyValue; + float *ary; + int n; +} Hyper; +typedef struct { + char *key; + Hyper *hypers; + int n; +} Env; + +typedef struct { + Env *envs; + int n; +} Dataset; + +Hyper* get_hyper(Dataset *data, char *env, char* hyper) { + for (int i = 0; i < data->n; i++) { + if (strcmp(data->envs[i].key, env) != 0) { + continue; + } + for (int j = 0; j < data->envs[i].n; j++) { + if (strcmp(data->envs[i].hypers[j].key, hyper) == 0) { + return &data->envs[i].hypers[j]; + } + } + } + printf("Error: hyper %s not found in env %s\n", hyper, env); + exit(1); + return NULL; +} typedef struct PlotArgs { float x_min; @@ -203,7 +236,7 @@ void draw_axes(PlotArgs args) { } } -void draw_box_axes(KeyValue *hypers, int hyper_count, PlotArgs args) { +void draw_box_axes(char* hypers[], int hyper_count, PlotArgs args) { int width = args.width; int height = args.height; @@ -277,7 +310,7 @@ void draw_box_axes(KeyValue *hypers, int hyper_count, PlotArgs args) { // Y ticks for (int i=0; in; env++) { + for (int hyper=0; hyperenvs[env].n; hyper++) { + if (strcmp(data->envs[env].hypers[hyper].key, key) != 0) { + continue; + } + float val = data->envs[env].hypers[hyper].ary[0]; + if (val < mmin){ + mmin = val; + } + } + } + return mmin; +} + +float hyper_max(Dataset *data, char* key) { + float mmax = FLT_MIN; + for (int env=0; envn; env++) { + for (int hyper=0; hyperenvs[env].n; hyper++) { + if (strcmp(data->envs[env].hypers[hyper].key, key) != 0) { + continue; + } + float val = data->envs[env].hypers[hyper].ary[0]; + if (val > mmax){ + mmax = val; + } + } + } + return mmax; +} + +/* +float hyper_max(Hyper *hyper) { + float max = hyper->ary[0]; + for (int i=1; in; i++) { + if (hyper->ary[i] > max) max = hyper->ary[i]; + } + return max; +} + float ary_min(float* ary, int num) { float min = ary[0]; for (int i=1; iary; + float mmin = ary[0]; + float mmax = ary[0]; + for (int j=0; jn; j++) { + mmin = fmin(mmin, ary[j]); + mmax = fmax(mmax, ary[j]); + } if (log_x) { - x1 = x1 <= 0 ? 0 : log10(x1); - x2 = x2 <= 0 ? 0 : log10(x2); + mmin = mmin <= 0 ? 0 : log10(mmin); + mmax = mmax <= 0 ? 0 : log10(mmax); } - float left = args.x_margin + (x1 - x_min)/(x_max - x_min)*(width - 2*args.x_margin); - float right = args.x_margin + (x2 - x_min)/(x_max - x_min)*(width - 2*args.x_margin); - DrawRectangle(left, args.y_margin + j*dy, right - left, dy, PUFF_CYAN); + float left = args.x_margin + (mmin - x_min)/(x_max - x_min)*(width - 2*args.x_margin); + float right = args.x_margin + (mmax - x_min)/(x_max - x_min)*(width - 2*args.x_margin); + DrawRectangle(left, args.y_margin + i*dy, right - left, dy, color); } } -//void drift_plot(KeyValue *hypers, int hyper_count, PlotArgs args) { +void plot(Hyper* x, Hyper* y, PlotArgs args, Color color) { + assert(x->n == y->n); -void plot(float* x, float* y, int num_points, PlotArgs args) { int width = args.width; int height = args.height; @@ -399,19 +478,20 @@ void plot(float* x, float* y, int num_points, PlotArgs args) { dy = y_max - y_min; // Plot lines - for (int j = 0; j < num_points - 1; j++) { - float x1 = args.x_margin + (x[j] - x_min) / dx * (width - 2*args.x_margin); - float y1 = (height - args.y_margin) - (y[j] - y_min) / dy * (height - 2*args.y_margin); + for (int i=0; in; i++) { + float xi = args.x_margin + (x->ary[i] - x_min) / dx * (width - 2*args.x_margin); + float yi = (height - args.y_margin) - (y->ary[i] - y_min) / dy * (height - 2*args.y_margin); + DrawCircle(xi, yi, args.line_width, color); /* float x2 = args.margin + (x[j + 1] - x_min) / dx * (width - 2*args.margin); float y2 = (height - args.margin) - (y[j + 1] - y_min) / dy * (height - 2*args.margin); DrawLine(x1, y1, x2, y2, PUFF_CYAN); */ - DrawCircle(x1, y1, args.line_width, PUFF_CYAN); } } -void plot3(float* x, float* y, float* z, bool log_x, bool log_y, bool log_z, int num_points, PlotArgs args) { +void plot3(Hyper* x, Hyper* y, Hyper* z, bool log_x, bool log_y, bool log_z, PlotArgs args, Color color) { + assert(x->n == y->n && x->n == z->n); int width = args.width; int height = args.height; @@ -436,30 +516,21 @@ void plot3(float* x, float* y, float* z, bool log_x, bool log_y, bool log_z, int dz = z_max - z_min; // Plot lines - for (int j = 0; j < num_points - 1; j++) { - float xj = (log_x) ? log10(x[j]) : x[j]; - float yj = (log_y) ? log10(y[j]) : y[j]; - float zj = (log_z) ? log10(z[j]) : z[j]; - DrawSphere((Vector3){xj, yj, zj}, 0.1f, PUFF_CYAN); + for (int j = 0; j < x->n; j++) { + float xj = (log_x) ? log10(x->ary[j]) : x->ary[j]; + float yj = (log_y) ? log10(y->ary[j]) : y->ary[j]; + float zj = (log_z) ? log10(z->ary[j]) : z->ary[j]; + //DrawSphere((Vector3){xj, yj, zj}, 0.1f, color); + DrawCube((Vector3){xj, yj, zj}, 0.1f, 0.1f, 0.1f, color); } } -float* get_values(KeyValue *map, int map_count, char *search_key, int *out_size) { - for (int i = 0; i < map_count; i++) { - if (map[i].key && strcmp(map[i].key, search_key) == 0) { - *out_size = map[i].size; - return map[i].values; - } - } - return NULL; -} - -int cleanup(KeyValue *map, int map_count, cJSON *root, char *json_str) { +int cleanup(Hyper *map, int map_count, cJSON *root, char *json_str) { if (map) { for (int i=0; ix_min = 1e-8; - args->x_max = 1e8; - - for (int i=0; ix_min = fmin(args->x_min, values[j]); - //args->x_max = fmax(args->x_max, values[j]); - } - } -} - - int main(void) { - FILE *file = fopen("pufferlib/ocean/constellation/data.json", "r"); + FILE *file = fopen("pufferlib/ocean/constellation/all_cache.json", "r"); if (!file) { printf("Error opening file\n"); return 1; } + // Read in file fseek(file, 0, SEEK_END); long file_size = ftell(file); fseek(file, 0, SEEK_SET); char *json_str = malloc(file_size + 1); - if (!json_str) { - printf("Memory allocation error\n"); - fclose(file); - return 1; - } - - // Read file into buffer fread(json_str, 1, file_size, file); json_str[file_size] = '\0'; fclose(file); - cJSON *root = cJSON_Parse(json_str); if (!root) { printf("JSON parse error: %s\n", cJSON_GetErrorPtr()); free(json_str); return 1; } - if (!cJSON_IsObject(root)) { printf("Error: Root is not an object\n"); return cleanup(NULL, 0, root, json_str); } - int map_count = 0; - cJSON *item = root->child; - while (item) { - map_count++; - item = item->next; + // Load in dataset + Dataset data = {NULL, 0}; + cJSON *json_env = root->child; + while (json_env) { + data.n++; + json_env = json_env->next; } - KeyValue *map = calloc(map_count, sizeof(KeyValue)); - if (!map) { - printf("Memory allocation error\n"); - return cleanup(NULL, 0, root, json_str); - } - - // Load all keys and their float arrays - int hyper_count = 0; - int idx = 0; - item = root->child; - while (item) { - map[idx].key = strdup(item->string); - if (strncmp(map[idx].key, "train", 5) == 0) { - hyper_count++; - } - if (!map[idx].key) { - printf("Memory allocation error for key\n"); - return cleanup(map, map_count, root, json_str); - } - - if (!cJSON_IsArray(item)) { - printf("Error: Value for key '%s' is not an array\n", map[idx].key); - return cleanup(map, map_count, root, json_str); - } - int array_size = cJSON_GetArraySize(item); - map[idx].values = malloc(array_size * sizeof(float)); - if (!map[idx].values) { - printf("Memory allocation error for values\n"); - return cleanup(map, map_count, root, json_str); - } - - map[idx].size = array_size; - - for (int j = 0; j < array_size; j++) { - cJSON *sub = cJSON_GetArrayItem(item, j); - if (cJSON_IsNumber(sub)) { - map[idx].values[j] = (float)sub->valuedouble; + Env *envs = calloc(data.n, sizeof(Env)); + data.envs = envs; + json_env = root->child; + for (int i=0; ichild; + int hyper_points = 0; + while (json_hyper) { + envs[i].n++; + envs[i].key = strdup(json_env->string); + int nxt_hyper_points = cJSON_GetArraySize(json_hyper); + if (hyper_points == 0) { + hyper_points = nxt_hyper_points; } else { - continue; - printf("Error: Non-number in array for key '%s' at index %d\n", map[idx].key, j); - return cleanup(map, map_count, root, json_str); + assert(hyper_points == nxt_hyper_points); + } + json_hyper = json_hyper->next; + } + envs[i].hypers = calloc(envs[i].n, sizeof(Hyper)); + for (int j=0; jstring); + envs[i].hypers[j].ary = calloc(hyper_points, sizeof(float)); + int n = cJSON_GetArraySize(json_hyper); + envs[i].hypers[j].n = n; + for (int k = 0; k < n; k++) { + cJSON *sub = cJSON_GetArrayItem(json_hyper, k); + if (cJSON_IsNumber(sub)) { + envs[i].hypers[j].ary[k] = (float)sub->valuedouble; + } else { + continue; + //printf("Error: Non-number in array for key '%s' at index %d\n", map[idx].key, j); + } } } - - idx++; - item = item->next; } - // Create items as an array of strings - //if (map_count > 100) { - // map_count = 100; - //} - char **items = malloc(map_count * sizeof(char *)); - if (!items) { - printf("Memory allocation error\n"); - return cleanup(map, map_count, root, json_str); - } - for (int i = 0; i < map_count; i++) { - items[i] = map[i].key; // Or strdup if you need copies - } + int hyper_count = 9; + char *hyper_key[9] = { + "agent_steps", "cost", "environment/perf", "environment/score", + "train/learning_rate", "train/gamma", "train/gae_lambda", "train/ent_coef", "train/vf_coef" + }; + //char* items[] = {"environment/score", "cost", "train/learning_rate", "train/gamma", "train/gae_lambda"}; + //char options[] = "environment/score;cost;train/learning_rate;train/gamma;train/gae_lambda"; + // Create options as a semicolon-separated string size_t options_len = 0; - for (int i = 0; i < map_count; i++) { - options_len += strlen(map[i].key) + 1; // +1 for semicolon or null + for (int i = 0; i < hyper_count; i++) { + options_len += strlen(hyper_key[i]) + 1; } char *options = malloc(options_len); - if (!options) { - printf("Memory allocation error\n"); - free(items); - return cleanup(map, map_count, root, json_str); - } options[0] = '\0'; - for (int i = 0; i < map_count; i++) { + for (int i = 0; i < hyper_count; i++) { if (i > 0) strcat(options, ";"); - strcat(options, map[i].key); - } - - // Hypers - - hyper_count = 5; - char *hyper_key[5] = {"train/learning_rate", "train/gamma", "train/gae_lambda", "train/ent_coef", "train/vf_coef"}; - KeyValue hypers[5]; - for (int i=0; i<5; i++) { - hypers[i].key = hyper_key[i]; - hypers[i].values = get_values(map, map_count, hyper_key[i], &hypers[i].size); + strcat(options, hyper_key[i]); } - float *box_mmin = malloc(hyper_count * sizeof(float)); - float *box_mmax = malloc(hyper_count * sizeof(float)); - // Example usage: Print the arrays - // Cleanup - //free(cost_array); - //free(score_array); - //cJSON_Delete(root); - //free(json_str); - - //float *x = cost_array; - //float *y = score_array; - //float num_points = cost_size; - - //float *x = malloc(num_points * sizeof(float)); - //float *y = malloc(num_points * sizeof(float)); - - // Initialize Raylib InitWindow(2*DEFAULT_PLOT_ARGS.width, 2*DEFAULT_PLOT_ARGS.height + 2*SETTINGS_HEIGHT, "Puffer Constellation"); ClearBackground(PUFF_BACKGROUND); @@ -650,10 +648,10 @@ int main(void) { PlotArgs args1 = DEFAULT_PLOT_ARGS; RenderTexture2D fig1 = LoadRenderTexture(args1.width, args1.height); bool fig1_x_active = false; - int fig1_x_idx = 2; + int fig1_x_idx = 0; bool fig1_x_log = true; bool fig1_y_active = false; - int fig1_y_idx = 6; + int fig1_y_idx = 2; bool fig1_y_log = false; bool fig1_z_active = false; int fig1_z_idx = 1; @@ -664,7 +662,7 @@ int main(void) { bool fig2_x_active = false; int fig2_x_idx = 1; bool fig2_y_active = false; - int fig2_y_idx = 0; + int fig2_y_idx = 2; PlotArgs args3 = DEFAULT_PLOT_ARGS; args3.x_margin = 250; @@ -677,17 +675,17 @@ int main(void) { PlotArgs args4 = DEFAULT_PLOT_ARGS; RenderTexture2D fig4 = LoadRenderTexture(args4.width, args4.height); + float *box_mmin = malloc(hyper_count * sizeof(float)); + float *box_mmax = malloc(hyper_count * sizeof(float)); bool fig4_x_active = false; int fig4_x_idx = 4; + bool fig4_x_log = true; bool fig4_y_active = false; int fig4_y_idx = 0; - //char* items[] = {"environment/score", "cost", "train/learning_rate", "train/gamma", "train/gae_lambda"}; - //char options[] = "environment/score;cost;train/learning_rate;train/gamma;train/gae_lambda"; - - float* x; - float* y; - float* z; + Hyper* x; + Hyper* y; + Hyper* z; int num_points; char* x_label; char* y_label; @@ -697,21 +695,18 @@ int main(void) { BeginDrawing(); ClearBackground(PUFF_BACKGROUND); - x_label = items[fig1_x_idx]; - y_label = items[fig1_y_idx]; - z_label = items[fig1_z_idx]; + x_label = hyper_key[fig1_x_idx]; + y_label = hyper_key[fig1_y_idx]; + z_label = hyper_key[fig1_z_idx]; args1.x_label = x_label; args1.y_label = y_label; args1.z_label = z_label; - x = get_values(map, map_count, x_label, &num_points); - y = get_values(map, map_count, y_label, &num_points); - z = get_values(map, map_count, z_label, &num_points); - args1.x_min = ary_min(x, num_points); - args1.x_max = ary_max(x, num_points); - args1.y_min = ary_min(y, num_points); - args1.y_max = ary_max(y, num_points); - args1.z_min = ary_min(z, num_points); - args1.z_max = ary_max(z, num_points); + args1.x_min = hyper_min(&data, hyper_key[fig1_x_idx]); + args1.x_max = hyper_max(&data, hyper_key[fig1_x_idx]); + args1.y_min = hyper_min(&data, hyper_key[fig1_y_idx]); + args1.y_max = hyper_max(&data, hyper_key[fig1_y_idx]); + args1.z_min = hyper_min(&data, hyper_key[fig1_z_idx]); + args1.z_max = hyper_max(&data, hyper_key[fig1_z_idx]); float x_mid = fig1_x_log ? (log10(args1.x_max) + log10(args1.x_min))/2.0f : (args1.x_max + args1.x_min)/2.0f; float y_mid = fig1_y_log ? (log10(args1.y_max) + log10(args1.y_min))/2.0f : (args1.y_max + args1.y_min)/2.0f; float z_mid = fig1_z_log ? (log10(args1.z_max) + log10(args1.z_min))/2.0f : (args1.z_max + args1.z_min)/2.0f; @@ -720,7 +715,15 @@ int main(void) { ClearBackground(PUFF_BACKGROUND); BeginMode3D(camera); UpdateCamera(&camera, CAMERA_ORBITAL); - plot3(x, y, z, fig1_x_log, fig1_y_log, fig1_z_log, num_points, args1); + + for (int i=0; i Date: Sat, 11 Oct 2025 20:20:40 +0000 Subject: [PATCH 054/188] UI --- pufferlib/ocean/constellation/constellation.c | 236 ++++++++++++++---- 1 file changed, 183 insertions(+), 53 deletions(-) diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index dbd6efc27..a4b6223eb 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -357,13 +357,16 @@ void draw_axes3(PlotArgs args) { float hyper_min(Dataset *data, char* key) { float mmin = FLT_MAX; for (int env=0; envn; env++) { - for (int hyper=0; hyperenvs[env].n; hyper++) { - if (strcmp(data->envs[env].hypers[hyper].key, key) != 0) { + for (int i=0; ienvs[env].n; i++) { + Hyper* hyper = &data->envs[env].hypers[i]; + if (strcmp(hyper->key, key) != 0) { continue; } - float val = data->envs[env].hypers[hyper].ary[0]; - if (val < mmin){ - mmin = val; + for (int j=0; jn; j++) { + float val = hyper->ary[j]; + if (val < mmin){ + mmin = val; + } } } } @@ -371,21 +374,25 @@ float hyper_min(Dataset *data, char* key) { } float hyper_max(Dataset *data, char* key) { - float mmax = FLT_MIN; - for (int env=0; envn; env++) { - for (int hyper=0; hyperenvs[env].n; hyper++) { - if (strcmp(data->envs[env].hypers[hyper].key, key) != 0) { + float mmax = -FLT_MAX; + for (int i=0; in; i++) { + for (int j=0; jenvs[i].n; j++) { + Hyper* hyper = &data->envs[i].hypers[j]; + if (strcmp(hyper->key, key) != 0) { continue; } - float val = data->envs[env].hypers[hyper].ary[0]; - if (val > mmax){ - mmax = val; + for (int k=0; kn; k++) { + float val = hyper->ary[k]; + if (val > mmax){ + mmax = val; + } } } } return mmax; } + /* float hyper_max(Hyper *hyper) { float max = hyper->ary[0]; @@ -412,7 +419,8 @@ float ary_max(float* ary, int num) { */ -void boxplot(Dataset* data, bool log_x, char* env, char* hyper_key[], int hyper_count, PlotArgs args, Color color) { +void boxplot(Dataset* data, bool log_x, char* env, char* hyper_key[], int hyper_count, PlotArgs args, Color color, + Hyper* filter1, float f1min, float f1max, Hyper* filter2, float f2min, float f2max) { int width = args.width; int height = args.height; @@ -438,6 +446,15 @@ void boxplot(Dataset* data, bool log_x, char* env, char* hyper_key[], int hyper_ float mmin = ary[0]; float mmax = ary[0]; for (int j=0; jn; j++) { + float f1 = filter1->ary[j]; + if (f1 < f1min || f1 > f1max) { + continue; + } + float f2 = filter2->ary[j]; + if (f2 < f2min || f2 > f2max) { + continue; + } + mmin = fmin(mmin, ary[j]); mmax = fmax(mmax, ary[j]); } @@ -453,16 +470,36 @@ void boxplot(Dataset* data, bool log_x, char* env, char* hyper_key[], int hyper_ } } -void plot(Hyper* x, Hyper* y, PlotArgs args, Color color) { +void plot(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, Color color) { assert(x->n == y->n); int width = args.width; int height = args.height; + float x_min = args.x_min; + float x_max = args.x_max; + float y_min = args.y_min; + float y_max = args.y_max; + + float dx = x_max - x_min; + float dy = y_max - y_min; + for (int i=0; in; i++) { + float xi = log_x ? log10(x->ary[i]) : x->ary[i]; + float yi = log_y ? log10(y->ary[i]) : y->ary[i]; + xi = args.x_margin + (xi - x_min) / dx * (width - 2*args.x_margin); + yi = (height - args.y_margin) - (yi - y_min) / dy * (height - 2*args.y_margin); + if (xi < args.x_margin) { + int s = 2; + } + DrawCircle(xi, yi, args.line_width, color); + } +} - // Find min/max for scaling - //float z_min = args.z_min == EMPTY ? ary_min(z, num_points) : args.z_min; - //float z_max = args.z_max == EMPTY ? ary_max(z, num_points) : args.z_max; +void plot_filtered(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, Color color, + Hyper* filter1, float f1min, float f1max, Hyper* filter2, float f2min, float f2max) { + assert(x->n == y->n); + int width = args.width; + int height = args.height; float x_min = args.x_min; float x_max = args.x_max; float y_min = args.y_min; @@ -470,26 +507,29 @@ void plot(Hyper* x, Hyper* y, PlotArgs args, Color color) { float dx = x_max - x_min; float dy = y_max - y_min; - if (dx == 0) dx = 1.0f; - if (dy == 0) dy = 1.0f; - x_min -= 0.1f * dx; x_max += 0.1f * dx; - y_min -= 0.1f * dy; y_max += 0.1f * dy; - dx = x_max - x_min; - dy = y_max - y_min; - // Plot lines for (int i=0; in; i++) { - float xi = args.x_margin + (x->ary[i] - x_min) / dx * (width - 2*args.x_margin); - float yi = (height - args.y_margin) - (y->ary[i] - y_min) / dy * (height - 2*args.y_margin); + float f1 = filter1->ary[i]; + if (f1 < f1min || f1 > f1max) { + continue; + } + float f2 = filter2->ary[i]; + if (f2 < f2min || f2 > f2max) { + continue; + } + + float xi = log_x ? log10(x->ary[i]) : x->ary[i]; + float yi = log_y ? log10(y->ary[i]) : y->ary[i]; + xi = args.x_margin + (xi - x_min) / dx * (width - 2*args.x_margin); + yi = (height - args.y_margin) - (yi - y_min) / dy * (height - 2*args.y_margin); + if (xi < args.x_margin) { + int s = 2; + } DrawCircle(xi, yi, args.line_width, color); - /* - float x2 = args.margin + (x[j + 1] - x_min) / dx * (width - 2*args.margin); - float y2 = (height - args.margin) - (y[j + 1] - y_min) / dy * (height - 2*args.margin); - DrawLine(x1, y1, x2, y2, PUFF_CYAN); - */ } } + void plot3(Hyper* x, Hyper* y, Hyper* z, bool log_x, bool log_y, bool log_z, PlotArgs args, Color color) { assert(x->n == y->n && x->n == z->n); int width = args.width; @@ -640,7 +680,7 @@ int main(void) { DEFAULT_PLOT_ARGS.font_small = LoadFontEx("resources/shared/Montserrat-Regular.ttf", 12, NULL, 255); Camera3D camera = (Camera3D){ 0 }; - camera.position = (Vector3){ 10.0f, 10.0f, 10.0f }; + camera.position = (Vector3){ 5.0f, 5.0f, 5.0f }; camera.target = (Vector3){ 0.0f, 0.0f, 0.0f }; camera.up = (Vector3){ 0.0f, 1.0f, 0.0f }; camera.fovy = 45.0f; @@ -661,27 +701,43 @@ int main(void) { RenderTexture2D fig2 = LoadRenderTexture(args2.width, args2.height); bool fig2_x_active = false; int fig2_x_idx = 1; + bool fig2_x_log = true; bool fig2_y_active = false; int fig2_y_idx = 2; + bool fig2_y_log = false; PlotArgs args3 = DEFAULT_PLOT_ARGS; - args3.x_margin = 250; RenderTexture2D fig3 = LoadRenderTexture(args3.width, args3.height); - bool fig3_x_active = false; - int fig3_x_idx = 3; - bool fig3_x_log = true; - bool fig3_y_active = false; - int fig3_y_idx = 0; + bool fig3_range1_active = false; + int fig3_range1_idx = 2; + char fig3_range1_min[32]; + char fig3_range1_max[32]; + float fig3_range1_min_val = 0; + float fig3_range1_max_val = 1; + bool fig3_range2_active = false; + int fig3_range2_idx = 1; + char fig3_range2_min[32]; + char fig3_range2_max[32]; + float fig3_range2_min_val = 0; + float fig3_range2_max_val = 10000; PlotArgs args4 = DEFAULT_PLOT_ARGS; RenderTexture2D fig4 = LoadRenderTexture(args4.width, args4.height); float *box_mmin = malloc(hyper_count * sizeof(float)); float *box_mmax = malloc(hyper_count * sizeof(float)); - bool fig4_x_active = false; - int fig4_x_idx = 4; bool fig4_x_log = true; - bool fig4_y_active = false; - int fig4_y_idx = 0; + bool fig4_range1_active = false; + int fig4_range1_idx = 2; + char fig4_range1_min[32]; + char fig4_range1_max[32]; + float fig4_range1_min_val = 0; + float fig4_range1_max_val = 1; + bool fig4_range2_active = false; + int fig4_range2_idx = 1; + char fig4_range2_min[32]; + char fig4_range2_max[32]; + float fig4_range2_min_val = 0; + float fig4_range2_max_val = 10000; Hyper* x; Hyper* y; @@ -691,10 +747,16 @@ int main(void) { char* y_label; char* z_label; + Vector2 focus = {0, 0}; + while (!WindowShouldClose()) { BeginDrawing(); ClearBackground(PUFF_BACKGROUND); + if (IsMouseButtonPressed(MOUSE_LEFT_BUTTON)) { + focus = GetMousePosition(); + } + x_label = hyper_key[fig1_x_idx]; y_label = hyper_key[fig1_y_idx]; z_label = hyper_key[fig1_z_idx]; @@ -760,6 +822,10 @@ int main(void) { args2.x_max = hyper_max(&data, hyper_key[fig2_x_idx]); args2.y_min = hyper_min(&data, hyper_key[fig2_y_idx]); args2.y_max = hyper_max(&data, hyper_key[fig2_y_idx]); + args2.x_min = (fig2_x_log) ? log10(args2.x_min) : args2.x_min; + args2.x_max = (fig2_x_log) ? log10(args2.x_max) : args2.x_max; + args2.y_min = (fig2_y_log) ? log10(args2.y_min) : args2.y_min; + args2.y_max = (fig2_y_log) ? log10(args2.y_max) : args2.y_max; BeginTextureMode(fig2); ClearBackground(PUFF_BACKGROUND); @@ -767,7 +833,7 @@ int main(void) { char* env = data.envs[i].key; x = get_hyper(&data, env, hyper_key[fig2_x_idx]); y = get_hyper(&data, env, hyper_key[fig2_y_idx]); - plot(x, y, args2, COLORS[i]); + plot(x, y, fig2_x_log, fig2_y_log, args2, COLORS[i]); } draw_axes(args2); EndTextureMode(); @@ -780,15 +846,18 @@ int main(void) { if (GuiDropdownBox(fig2_x_rect, options, &fig2_x_idx, fig2_x_active)){ fig2_x_active = !fig2_x_active; } - Rectangle fig2_y_rect = {fig1.texture.width + DROPDOWN_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + Rectangle fig2_x_check_rect = {fig1.texture.width + DROPDOWN_WIDTH, 0, SETTINGS_HEIGHT, SETTINGS_HEIGHT}; + GuiCheckBox(fig2_x_check_rect, "Log X", &fig2_x_log); + Rectangle fig2_y_rect = {fig1.texture.width + DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; if (GuiDropdownBox(fig2_y_rect, options, &fig2_y_idx, fig2_y_active)){ fig2_y_active = !fig2_y_active; } + Rectangle fig2_y_check_rect = {fig1.texture.width + 2*DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, SETTINGS_HEIGHT, SETTINGS_HEIGHT}; + GuiCheckBox(fig2_y_check_rect, "Log Y", &fig2_y_log); // Figure 3 args3.x_label = "tsne1"; args3.y_label = "tsne2"; - args3.y_label = y_label; args3.x_min = hyper_min(&data, "tsne1"); args3.x_max = hyper_max(&data, "tsne1"); args3.y_min = hyper_min(&data, "tsne2"); @@ -800,7 +869,13 @@ int main(void) { char* env = data.envs[i].key; x = get_hyper(&data, env, "tsne1"); y = get_hyper(&data, env, "tsne2"); - plot(x, y, args3, COLORS[i]); + //plot(x, y, false, false, args3, COLORS[i]); + Hyper* filter1 = get_hyper(&data, env, hyper_key[fig3_range1_idx]); + Hyper* filter2 = get_hyper(&data, env, hyper_key[fig3_range2_idx]); + plot_filtered(x, y, false, false, args3, COLORS[i], + filter1, fig3_range1_min_val, fig3_range1_max_val, + filter2, fig3_range2_min_val, fig3_range2_max_val + ); } draw_axes(args3); EndTextureMode(); @@ -809,6 +884,36 @@ int main(void) { (Rectangle){ 0, 0, fig3.texture.width, -fig3.texture.height }, (Vector2){ 0, SETTINGS_HEIGHT + fig1.texture.height }, WHITE ); + Rectangle fig3_range1_rect = {0, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig3_range1_rect, options, &fig3_range1_idx, fig3_range1_active)){ + fig3_range1_active = !fig3_range1_active; + } + Rectangle fig3_range1_min_rect = {DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH/2, SETTINGS_HEIGHT}; + bool active = CheckCollisionPointRec(focus, fig3_range1_min_rect); + if (GuiTextBox(fig3_range1_min_rect, fig3_range1_min, 32, active)) { + fig3_range1_min_val = atof(fig3_range1_min); + } + Rectangle fig3_range1_max_rect = {1.5*DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH/2, SETTINGS_HEIGHT}; + active = CheckCollisionPointRec(focus, fig3_range1_max_rect); + if (GuiTextBox(fig3_range1_max_rect, fig3_range1_max, 32, active)) { + fig3_range1_max_val = atof(fig3_range1_max); + } + Rectangle fig3_range2_rect = {2*DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig3_range2_rect, options, &fig3_range2_idx, fig3_range2_active)){ + fig3_range2_active = !fig3_range2_active; + } + Rectangle fig3_range2_min_rect = {3*DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH/2, SETTINGS_HEIGHT}; + active = CheckCollisionPointRec(focus, fig3_range2_min_rect); + if (GuiTextBox(fig3_range2_min_rect, fig3_range2_min, 32, active)) { + fig3_range2_min_val = atof(fig3_range2_min); + } + Rectangle fig3_range2_max_rect = {3.5*DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH/2, SETTINGS_HEIGHT}; + active = CheckCollisionPointRec(focus, fig3_range2_max_rect); + if (GuiTextBox(fig3_range2_max_rect, fig3_range2_max, 32, active)) { + fig3_range2_max_val = atof(fig3_range2_max); + } + + // Figure 4 args4.x_label = "Value"; @@ -818,7 +923,12 @@ int main(void) { BeginTextureMode(fig4); ClearBackground(PUFF_BACKGROUND); for (int i=0; i Date: Sat, 11 Oct 2025 21:19:44 +0000 Subject: [PATCH 055/188] prototype --- pufferlib/ocean/constellation/constellation.c | 144 ++++++++++++------ pufferlib/ocean/constellation/style_cyber.rgs | Bin 0 -> 3871 bytes 2 files changed, 99 insertions(+), 45 deletions(-) create mode 100644 pufferlib/ocean/constellation/style_cyber.rgs diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index a4b6223eb..69cb5c414 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -17,8 +17,8 @@ const Color PUFF_WHITE = (Color){241, 241, 241, 241}; const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; Color COLORS[] = { - DARKGRAY, MAROON, ORANGE, DARKGREEN, DARKBLUE, DARKPURPLE, DARKBROWN, - GRAY, RED, GOLD, LIME, BLUE, VIOLET, BROWN, LIGHTGRAY, PINK, YELLOW, + BLUE, DARKGRAY, MAROON, ORANGE, DARKGREEN, DARKBLUE, DARKPURPLE, DARKBROWN, + GRAY, RED, GOLD, LIME, VIOLET, BROWN, LIGHTGRAY, PINK, YELLOW, GREEN, SKYBLUE, PURPLE, BEIGE }; @@ -27,7 +27,7 @@ const float EMPTY = -4242.0f; #define SEP 4 #define SETTINGS_HEIGHT 20 #define TOGGLE_WIDTH 60 -#define DROPDOWN_WIDTH 200 +#define DROPDOWN_WIDTH 150 typedef struct { char *key; @@ -470,7 +470,7 @@ void boxplot(Dataset* data, bool log_x, char* env, char* hyper_key[], int hyper_ } } -void plot(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, Color color) { +void plot(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, Color color, Hyper* color_param, bool log_color) { assert(x->n == y->n); int width = args.width; @@ -482,6 +482,25 @@ void plot(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, Color color float dx = x_max - x_min; float dy = y_max - y_min; + + // Thresholded color + float threshold[8]; + if (color_param != NULL) { + float color_min = color_param->ary[0]; + float color_max = color_param->ary[1]; + for (int i=0; in; i++) { + color_min = fmin(color_min, color_param->ary[i]); + color_max = fmax(color_max, color_param->ary[i]); + } + for (int i=0; i<8; i++) { + if (log_color) { + threshold[i] = pow(10, log10(color_min) + (log10(color_max) - log10(color_min))*i/7.0f); + } else { + threshold[i] = color_min + (color_max - color_min)*i/7.0f; + } + } + } + for (int i=0; in; i++) { float xi = log_x ? log10(x->ary[i]) : x->ary[i]; float yi = log_y ? log10(y->ary[i]) : y->ary[i]; @@ -490,7 +509,20 @@ void plot(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, Color color if (xi < args.x_margin) { int s = 2; } - DrawCircle(xi, yi, args.line_width, color); + if (color_param == NULL) { + DrawCircle(xi, yi, args.line_width, color); + continue; + } + + // Thresholded color + int c = 0; + for (int j=0; j<8; j++) { + float fi = color_param->ary[i]; + if (fi > threshold[j]) { + c = j; + } + } + DrawCircle(xi, yi, args.line_width, COLORS[c]); } } @@ -671,8 +703,28 @@ int main(void) { strcat(options, hyper_key[i]); } + // Options with extra "env_name;" + char* extra = "env_name;"; + char *env_hyper_options = malloc(options_len + strlen(extra)); + strcpy(env_hyper_options, extra); + strcat(env_hyper_options, options); + + // Env names as semi-colon-separated string + size_t env_options_len = 4; + for (int i = 0; i < data.n; i++) { + env_options_len += strlen(data.envs[i].key) + 1; + } + char *env_options = malloc(env_options_len); + strcpy(env_options, "all;"); + env_options[4] = '\0'; + for (int i = 0; i < data.n; i++) { + if (i > 0) strcat(env_options, ";"); + strcat(env_options, data.envs[i].key); + } + // Initialize Raylib InitWindow(2*DEFAULT_PLOT_ARGS.width, 2*DEFAULT_PLOT_ARGS.height + 2*SETTINGS_HEIGHT, "Puffer Constellation"); + GuiLoadStyle("pufferlib/ocean/constellation/style_cyber.rgs"); ClearBackground(PUFF_BACKGROUND); SetTargetFPS(60); @@ -699,12 +751,17 @@ int main(void) { PlotArgs args2 = DEFAULT_PLOT_ARGS; RenderTexture2D fig2 = LoadRenderTexture(args2.width, args2.height); + int fig2_env_idx = 1; + bool fig2_env_active = false; bool fig2_x_active = false; int fig2_x_idx = 1; bool fig2_x_log = true; bool fig2_y_active = false; int fig2_y_idx = 2; bool fig2_y_log = false; + int fig2_color_idx = 1; + bool fig2_color_active = false; + bool fig2_log_color = true; PlotArgs args3 = DEFAULT_PLOT_ARGS; RenderTexture2D fig3 = LoadRenderTexture(args3.width, args3.height); @@ -829,12 +886,32 @@ int main(void) { BeginTextureMode(fig2); ClearBackground(PUFF_BACKGROUND); - for (int i=0; iTzM1?qd6|X30Km=K$0~o!7A|hK@;;5`af*^(^5ET^^fpGz3 zOH^Fhq98lT2PmKjiHPiygf&1|5<*BIOJAHh^WOUp-tU~MI;Xl$RdscD^{G&_)3!gC zL68RYH^A*HMi8_N0&d5YAV>)U?XV*REr$T7^#%kfL*RV|5`tDhU}B&Qf>uIc|5Y9Y zt%87K4S=B45HQ7eL(ne}!2G@eg4RI5>cEm86$oHYYeSGawC=Ak|3q64fv>gKA!q{x z47M+=XhIv8c>Ol$FFV>_wm%uLfuPtWx`z6jlp&>Gmefn0ms0&pO!fDN4wWOFES^ySIeK!@l~m0g zsB9-=Ne|lDt*3b}qZ%1Zdu)vrEB*?6p8H86#EwtQaA_5V>92~>0g|L%UdkPsQ98r! zO|nkY?cC49l`V4#oqC`&_C85YQnkaFHiEW;^hsvoH}!EjHNdL(orX~>ob_~vclfa= zGy{X1G|Qx-q}jxn9_wRix6}4~pgg`F6x`9n&e8TYOGAdwGf;#p3YNv7h`Z=+l9`g^ zi}KjBWsuPCR)a)mezsti#VEegU8zXmHz@OYor5#?;b~AvQ95R$DUV>?-$Q%%TBeWF zT^5alo5DtiVbr76%I=K1o_*52 zH+KP41^F>M&M;~1Z9HI-Y$zQBQ6zgZ>`9EFkFJTa9O{XdlR6UK?EuU~F0c{<;VuDX zT>p%FEQ!9aFDIJ*=bNRS8tqDqJ(N^3*e&npvVd5I_*wh^!8LM6P9-ONN4~$GOFyHa zM>ocYwysTbcxc|45yQL9&C7jWl}stEMW%NUGsG73&-yK5tDj#LcsPeF;C1r_@S*NP;!ef8cY z-BLYQ&HN4nqC#fv>adV0p`?7Y^kHDa@Y~~ro#Za>1AzNt6r}OM-ijz)TKo zGnUF^sFH)NC47;9ebv5lu9T_`DjVc*P(f?<&fGq-1fL&o9VSO<4~SMvY-C2wz?D`7 zyg#?7_Qu+~lS$0Z`TiV0jglWAvUnb&Q6qwz_^@s{kx6(&XFCO53Yp<9J-hz{g11jN zywNGZ4bgiBC9GK-&E35I#hdxjyw0G)xcwfl!iM`jFqQPt1F7M)P4#+iSYh^^Oci5i z<&w*{0&Zho(3H@;&b@iFZ>zD??p`cBHQt`7sJFQtDC*JIN4n8&wERpzXbb`&CxK(-AF0QvjgNWd?l`iN3lwSI zsTXQ|Whte;yBK`d0?B7&`u^B%HP#DNCS%BT6}p{(d^FuPk}hdU!_8&C=ldW0#*#2! z2tk`oy>jkiQk<_{x zdH9Q77DeM89Eg=6*m;W-JhW3uvLpi=|binSi&&F;OzWbvd%jWEGfECaz8_&*4F zM6QB5jZ|R%Zg78}2&adq6*?ZZ&{h>LH*hI=?NdA!lzO{kAtOj^4^C}%x#=^e%EtFk zwB*(8z0{w|wde@!#-`Ry80cUQWDN8w(}uhb_Z5geNtcE?_ZVZ#)9Z{C_nB{EDFYgd zZfClxUu;GPxY9xajZsDO0fNrZ)}6hKhDC$2@zl@VN$3pVZf1L2Y2Orb;^I z(L*q-cF`v3JH`cB?4#Hmjg?VKSwZx7fePjo1-sX^T_*o+pSiPMW9(-?Q5G_}4*#K> zM)UjO?NljTEHr4OM}H)l`W(&(?7A`~JxE9ob$4z9O^T+FQ#GV$o|&8BV?ban)1|H< zos^+?y<^=-)A>e{p>;BE5YF~q{(J|uza(Zvf|Sxw%+G*=00g?t{Xr2?O8R5!Xj_ha3<0^ z1(H%1#`8!!iG71E?@e#?c^ZXPeRtAtq++(X1K*3;xxA;Yq>Su$8O{Pt$SgnIXdd17 zOX|7b9X=A_4cey{tukS<_9 zFrrO_&mPb{N=O2W590;;7n$7{!>TxO%>|OTjCppqB|H-v&l}bisn5J)_n2t+OHU+Q zNW{dz{hcWUJjn%La~`()`H}yFS_03?@Cj@QR0Sriq^dKoAIP_aaP-eZ9q5bblJ#83-N-D+Q

BYl|p z(EP#_v3XLee7ZFJCE4h`r$$%)GdiO)E!T=Q?ACzFAC(a%L>Yl9f)oh|rg_VRF+zZJ zHd0h{bS~hYN`q*)jkcV-7H`8GLaujxsyjB=JE`_h#{XaPNFoXZtWpQH<%s|+n_ViR zSK7(rXB{=09Mm13?mwHgy6%bHrr?sKmg0f(;p@YxBU}~wQk!!+Hi9F#jmSk67$W33|@{95y6k0Ad;YTA7$-{vwyLlhUgD#Da87*-RS-QWkXwLD@dHX-5V_GhiNqWXWi z#<}8}H%!gHw zfwZlpW1@Je1ui_ZWZ?tu^^6rC&B51_o_fpb#-&fKN@78<(1VgNXtgW->jUE%22z6 zy?04=c_6#xw0n;+RjDh&gK5fHxs=DDNFF=y3Ij&W?WonSDV!dt!pQKwi!?C?aTTJ)x_qgNH>;8ZoYmXKnwm z6B$2P&^i5syZ)5CtFeBz@7HWbG&;LSD1C+<-&(TTvS>1r#aaU>U1s_5k#X9jNH}OV z!>XyXsuXoA!$d5aY%K!z~bnP)+uRtbEw&?$1#SOqSzGwnALMxPuRC4YB$>>v(HBD ze8yLnVUNfnziJ=BY9`xaPjwl|8YTu-bqn?OzENm<)(6ltyF<5xsD&((90+kYlk3TI zuf&$Mc}NRuEv{!>^VldsjAg2+PqWjkEr;(&&~E&?T(!2O7q<8>cr!RG;}TvRV10f#*}7biZI0Mivu@q3t`A!{N&H8 zyB5i{vHAl}`~Zn6>s3ht#Rev$WlBVi1+U*^Na70m-6(Gx+IIC66JaT;=z?r*7-eGd zkR$uGI*?vbk!xxB6KWUA+#SSfYLc$%II+-hzB)b%ogwv%K?o6#fsx$O7G+t#fY_l> zPOgn4KT|OIK=@<$mFV8bNbEUzdW1DKgmu~&rm_xQXG9>Tw7J{T*|_gBNfIGR1&(8X zKzxK1Xg{XVQlu~7igo1thJsfM#9MG3yES$Kv8%{W@e8;?%37E}jitb5Y?K7Gb!}7a zyVnlcpY345YqZj+e*K9MWT(SX%Z*HnwWs2ZIrdBQ!0j+TF|>i55*dM-Jpe<7HxoJM zyKuT Date: Sun, 12 Oct 2025 02:08:49 +0000 Subject: [PATCH 056/188] pretty --- pufferlib/ocean/constellation/constellation.c | 67 +++++++++++------- pufferlib/ocean/constellation/puffer.rgs | Bin 0 -> 128 bytes 2 files changed, 43 insertions(+), 24 deletions(-) create mode 100644 pufferlib/ocean/constellation/puffer.rgs diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index 69cb5c414..c963ad11e 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -5,6 +5,7 @@ #include #include #include "raylib.h" +#include "rlgl.h" #define RAYGUI_IMPLEMENTATION #include "raygui.h" @@ -98,13 +99,13 @@ PlotArgs DEFAULT_PLOT_ARGS = { .z_max = EMPTY, .width = 960, .height = 540 - SETTINGS_HEIGHT, - .title_font_size = 24, - .axis_font_size = 24, - .axis_tick_font_size = 12, + .title_font_size = 32, + .axis_font_size = 32, + .axis_tick_font_size = 16, .legend_font_size = 12, .line_width = 2, .tick_length = 8, - .x_margin = 70, + .x_margin = 100, .y_margin = 70, .font_color = PUFF_WHITE, .background_color = PUFF_BACKGROUND, @@ -194,7 +195,7 @@ void draw_axes(PlotArgs args) { args.axis_color ); - Vector2 this_tick_size = MeasureTextEx(args.font, label, args.axis_font_size, 0); + Vector2 this_tick_size = MeasureTextEx(args.font, label, args.axis_tick_font_size, 0); DrawTextEx( args.font_small, label, @@ -220,7 +221,7 @@ void draw_axes(PlotArgs args) { y_pos, args.axis_color ); - Vector2 this_tick_size = MeasureTextEx(args.font, label, args.axis_font_size, 0); + Vector2 this_tick_size = MeasureTextEx(args.font, label, args.axis_tick_font_size, 0); DrawTextEx( args.font_small, label, @@ -294,7 +295,7 @@ void draw_box_axes(char* hypers[], int hyper_count, PlotArgs args) { args.axis_color ); - Vector2 this_tick_size = MeasureTextEx(args.font, label, args.axis_font_size, 0); + Vector2 this_tick_size = MeasureTextEx(args.font, label, args.axis_tick_font_size, 0); DrawTextEx( args.font_small, label, @@ -319,7 +320,7 @@ void draw_box_axes(char* hypers[], int hyper_count, PlotArgs args) { y_pos, args.axis_color ); - Vector2 this_tick_size = MeasureTextEx(args.font, label, args.axis_font_size, 0); + Vector2 this_tick_size = MeasureTextEx(args.font, label, args.axis_tick_font_size, 0); DrawTextEx( args.font_small, label, @@ -336,20 +337,32 @@ void draw_box_axes(char* hypers[], int hyper_count, PlotArgs args) { } -void draw_axes3(PlotArgs args) { +void draw_axes3(PlotArgs args, bool log_x, bool log_y, bool log_z) { + float extent = 1.0f; + float dx = log_x ? log10(args.x_max) - log10(args.x_min) : args.x_max - args.x_min; + float dy = log_y ? log10(args.y_max) - log10(args.y_min) : args.y_max - args.y_min; + float dz = log_z ? log10(args.z_max) - log10(args.z_min) : args.z_max - args.z_min; + extent = fmax(extent, dx); + extent = fmax(extent, dy); + extent = fmax(extent, dz); + + float x = log_x ? log10(args.x_min) : args.x_min; + float y = log_y ? log10(args.y_min) : args.y_min; + float z = log_z ? log10(args.z_min) : args.z_min; + DrawLine3D( - (Vector3){-10.0f, 0, 0}, - (Vector3){10.0f, 0, 0}, + (Vector3){x, y, z}, + (Vector3){x + extent, y, z}, RED ); DrawLine3D( - (Vector3){0, -10.0f, 0}, - (Vector3){0, 10.0f, 0}, + (Vector3){x, y, z}, + (Vector3){x, y + extent, z}, GREEN ); DrawLine3D( - (Vector3){0, 0, -10.0f}, - (Vector3){0, 0, 10.0f}, + (Vector3){x, y, z}, + (Vector3){x, y, z + extent}, BLUE ); } @@ -438,7 +451,7 @@ void boxplot(Dataset* data, bool log_x, char* env, char* hyper_key[], int hyper_ dx = x_max - x_min; float dy = (height - 2*args.y_margin)/((float)hyper_count); - //Color faded = Fade(color, 0.25f); + Color faded = Fade(color, 0.15f); for (int i=0; iary[j]) : y->ary[j]; float zj = (log_z) ? log10(z->ary[j]) : z->ary[j]; //DrawSphere((Vector3){xj, yj, zj}, 0.1f, color); - DrawCube((Vector3){xj, yj, zj}, 0.1f, 0.1f, 0.1f, color); + DrawCube((Vector3){xj, yj, zj}, 0.02f, 0.02f, 0.02f, color); + //DrawPoint3D((Vector3){xj, yj, zj}, color); } } @@ -724,15 +738,15 @@ int main(void) { // Initialize Raylib InitWindow(2*DEFAULT_PLOT_ARGS.width, 2*DEFAULT_PLOT_ARGS.height + 2*SETTINGS_HEIGHT, "Puffer Constellation"); - GuiLoadStyle("pufferlib/ocean/constellation/style_cyber.rgs"); + GuiLoadStyle("pufferlib/ocean/constellation/puffer.rgs"); ClearBackground(PUFF_BACKGROUND); SetTargetFPS(60); - DEFAULT_PLOT_ARGS.font = LoadFontEx("resources/shared/Montserrat-Regular.ttf", 24, NULL, 255); - DEFAULT_PLOT_ARGS.font_small = LoadFontEx("resources/shared/Montserrat-Regular.ttf", 12, NULL, 255); + DEFAULT_PLOT_ARGS.font = LoadFontEx("resources/shared/JetBrainsMono-SemiBold.ttf", 32, NULL, 255); + DEFAULT_PLOT_ARGS.font_small = LoadFontEx("resources/shared/JetBrainsMono-SemiBold.ttf", 16, NULL, 255); Camera3D camera = (Camera3D){ 0 }; - camera.position = (Vector3){ 5.0f, 5.0f, 5.0f }; + camera.position = (Vector3){ 5.0f, 2.0f, 5.0f }; camera.target = (Vector3){ 0.0f, 0.0f, 0.0f }; camera.up = (Vector3){ 0.0f, 1.0f, 0.0f }; camera.fovy = 45.0f; @@ -751,6 +765,7 @@ int main(void) { PlotArgs args2 = DEFAULT_PLOT_ARGS; RenderTexture2D fig2 = LoadRenderTexture(args2.width, args2.height); + //SetTextureFilter(fig2.texture, TEXTURE_FILTER_POINT); int fig2_env_idx = 1; bool fig2_env_active = false; bool fig2_x_active = false; @@ -843,7 +858,7 @@ int main(void) { plot3(x, y, z, fig1_x_log, fig1_y_log, fig1_z_log, args1, COLORS[i]); } - draw_axes3(args1); + draw_axes3(args1, fig1_x_log, fig1_y_log, fig1_z_log); EndMode3D(); EndTextureMode(); DrawTextureRec( @@ -1005,18 +1020,22 @@ int main(void) { // Figure 4 args4.x_label = "Value"; args4.y_label = "Hyperparameter"; + args4.x_margin = 200; args4.x_min = 1e-8; args4.x_max = 1e8; BeginTextureMode(fig4); ClearBackground(PUFF_BACKGROUND); + rlSetBlendFactorsSeparate(0x0302, 0x0303, 1, 0x0303, 0x8006, 0x8006); + BeginBlendMode(BLEND_CUSTOM_SEPARATE); for (int i=0; iFfn}me-$Xf%m5K*VfepgrV#@JE5m;V z5MpEa|N5o@0|Pt5|LMs Date: Mon, 13 Oct 2025 21:54:17 +0000 Subject: [PATCH 057/188] Progress! --- pufferlib/ocean/constellation/constellation.c | 407 +++++++++--------- 1 file changed, 205 insertions(+), 202 deletions(-) diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index c963ad11e..41e006d70 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -18,8 +18,8 @@ const Color PUFF_WHITE = (Color){241, 241, 241, 241}; const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; Color COLORS[] = { - BLUE, DARKGRAY, MAROON, ORANGE, DARKGREEN, DARKBLUE, DARKPURPLE, DARKBROWN, - GRAY, RED, GOLD, LIME, VIOLET, BROWN, LIGHTGRAY, PINK, YELLOW, + BLUE, MAROON, ORANGE, DARKGREEN, DARKBLUE, DARKPURPLE, DARKBROWN, + GRAY, RED, GOLD, LIME, VIOLET, LIGHTGRAY, PINK, YELLOW, GREEN, SKYBLUE, PURPLE, BEIGE }; @@ -28,7 +28,8 @@ const float EMPTY = -4242.0f; #define SEP 4 #define SETTINGS_HEIGHT 20 #define TOGGLE_WIDTH 60 -#define DROPDOWN_WIDTH 150 +#define DROPDOWN_WIDTH 120 +#define BUCKETS 8 typedef struct { char *key; @@ -63,6 +64,14 @@ Hyper* get_hyper(Dataset *data, char *env, char* hyper) { return NULL; } +// TODO: Slow as fuck +Color rgb(float h) { + float r = fmaxf(0.f, fminf(1.f, fabsf(fmodf(h * 6.f, 6.f) - 3.f) - 1.f)); + float g = fmaxf(0.f, fminf(1.f, fabsf(fmodf(h * 6.f + 4.f, 6.f) - 3.f) - 1.f)); + float b = fmaxf(0.f, fminf(1.f, fabsf(fmodf(h * 6.f + 2.f, 6.f) - 3.f) - 1.f)); + return (Color){r * 255.f + .5f, g * 255.f + .5f, b * 255.f + .5f, 255}; +} + typedef struct PlotArgs { float x_min; float x_max; @@ -367,9 +376,9 @@ void draw_axes3(PlotArgs args, bool log_x, bool log_y, bool log_z) { ); } -float hyper_min(Dataset *data, char* key) { +float hyper_min(Dataset *data, char* key, int start, int end) { float mmin = FLT_MAX; - for (int env=0; envn; env++) { + for (int env=start; envenvs[env].n; i++) { Hyper* hyper = &data->envs[env].hypers[i]; if (strcmp(hyper->key, key) != 0) { @@ -386,9 +395,9 @@ float hyper_min(Dataset *data, char* key) { return mmin; } -float hyper_max(Dataset *data, char* key) { +float hyper_max(Dataset *data, char* key, int start, int end) { float mmax = -FLT_MAX; - for (int i=0; in; i++) { + for (int i=start; ienvs[i].n; j++) { Hyper* hyper = &data->envs[i].hypers[j]; if (strcmp(hyper->key, key) != 0) { @@ -432,8 +441,7 @@ float ary_max(float* ary, int num) { */ -void boxplot(Dataset* data, bool log_x, char* env, char* hyper_key[], int hyper_count, PlotArgs args, Color color, - Hyper* filter1, float f1min, float f1max, Hyper* filter2, float f2min, float f2max) { +void boxplot(Dataset* data, bool log_x, char* env, char* hyper_key[], int hyper_count, PlotArgs args, Color color, float* filter) { int width = args.width; int height = args.height; @@ -459,15 +467,9 @@ void boxplot(Dataset* data, bool log_x, char* env, char* hyper_key[], int hyper_ float mmin = ary[0]; float mmax = ary[0]; for (int j=0; jn; j++) { - float f1 = filter1->ary[j]; - if (f1 < f1min || f1 > f1max) { + if (filter != NULL && !filter[j]) { continue; } - float f2 = filter2->ary[j]; - if (f2 < f2min || f2 > f2max) { - continue; - } - mmin = fmin(mmin, ary[j]); mmax = fmax(mmax, ary[j]); } @@ -483,7 +485,7 @@ void boxplot(Dataset* data, bool log_x, char* env, char* hyper_key[], int hyper_ } } -void plot(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, Color color, Hyper* color_param, bool log_color) { +void plot(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, float* cmap, bool* filter) { assert(x->n == y->n); int width = args.width; @@ -496,25 +498,10 @@ void plot(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, Color color float dx = x_max - x_min; float dy = y_max - y_min; - // Thresholded color - float threshold[8]; - if (color_param != NULL) { - float color_min = color_param->ary[0]; - float color_max = color_param->ary[1]; - for (int i=0; in; i++) { - color_min = fmin(color_min, color_param->ary[i]); - color_max = fmax(color_max, color_param->ary[i]); - } - for (int i=0; i<8; i++) { - if (log_color) { - threshold[i] = pow(10, log10(color_min) + (log10(color_max) - log10(color_min))*i/7.0f); - } else { - threshold[i] = color_min + (color_max - color_min)*i/7.0f; - } - } - } - for (int i=0; in; i++) { + if (filter != NULL && !filter[i]) { + continue; + } float xi = log_x ? log10(x->ary[i]) : x->ary[i]; float yi = log_y ? log10(y->ary[i]) : y->ary[i]; xi = args.x_margin + (xi - x_min) / dx * (width - 2*args.x_margin); @@ -522,20 +509,8 @@ void plot(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, Color color if (xi < args.x_margin) { int s = 2; } - if (color_param == NULL) { - DrawCircle(xi, yi, args.line_width, color); - continue; - } - - // Thresholded color - int c = 0; - for (int j=0; j<8; j++) { - float fi = color_param->ary[i]; - if (fi > threshold[j]) { - c = j; - } - } - DrawCircle(xi, yi, args.line_width, COLORS[c]); + Color c = rgb(cmap[i]); + DrawCircle(xi, yi, args.line_width, c); } } @@ -574,8 +549,7 @@ void plot_filtered(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, Co } } - -void plot3(Hyper* x, Hyper* y, Hyper* z, bool log_x, bool log_y, bool log_z, PlotArgs args, Color color) { +void plot3(Hyper* x, Hyper* y, Hyper* z, bool log_x, bool log_y, bool log_z, PlotArgs args, float* cmap, bool* filter) { assert(x->n == y->n && x->n == z->n); int width = args.width; int height = args.height; @@ -600,14 +574,17 @@ void plot3(Hyper* x, Hyper* y, Hyper* z, bool log_x, bool log_y, bool log_z, Plo dy = y_max - y_min; dz = z_max - z_min; + // Plot lines - for (int j = 0; j < x->n; j++) { - float xj = (log_x) ? log10(x->ary[j]) : x->ary[j]; - float yj = (log_y) ? log10(y->ary[j]) : y->ary[j]; - float zj = (log_z) ? log10(z->ary[j]) : z->ary[j]; - //DrawSphere((Vector3){xj, yj, zj}, 0.1f, color); - DrawCube((Vector3){xj, yj, zj}, 0.02f, 0.02f, 0.02f, color); - //DrawPoint3D((Vector3){xj, yj, zj}, color); + for (int i = 0; i < x->n; i++) { + if (filter != NULL && !filter[i]) { + continue; + } + float xi = (log_x) ? log10(x->ary[i]) : x->ary[i]; + float yi = (log_y) ? log10(y->ary[i]) : y->ary[i]; + float zi = (log_z) ? log10(z->ary[i]) : z->ary[i]; + Color c = rgb(cmap[i]); + DrawCube((Vector3){xi, yi, zi}, 0.02f, 0.02f, 0.02f, c); } } @@ -624,6 +601,67 @@ int cleanup(Hyper *map, int map_count, cJSON *root, char *json_str) { return 1; } +void GuiDropdownCheckbox(int x, int y, char* options, int *selection, bool *active, char *text, bool *checked) { + Rectangle rect = {x, y, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(rect, options, selection, *active)) { + *active = !*active; + } + Rectangle check_rect = {x + rect.width , y, SETTINGS_HEIGHT, rect.height}; + GuiCheckBox(check_rect, text, checked); +} + +void GuiDropdownFilter(int x, int y, char* options, int *selection, bool *dropdown_active, + Vector2 focus, char *text1, float *text1_val, char *text2, float *text2_val) { + Rectangle rect = {x, y, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(rect, options, selection, *dropdown_active)) { + *dropdown_active = !*dropdown_active; + } + Rectangle text1_rect = {x + rect.width, y, DROPDOWN_WIDTH/2, SETTINGS_HEIGHT}; + bool text1_active = CheckCollisionPointRec(focus, text1_rect); + if (GuiTextBox(text1_rect, text1, 32, text1_active)) { + *text1_val = atof(text1); + } + Rectangle text2_rect = {x + 1.5*DROPDOWN_WIDTH, y, DROPDOWN_WIDTH/2, SETTINGS_HEIGHT}; + bool text2_active = CheckCollisionPointRec(focus, text2_rect); + if (GuiTextBox(text2_rect, text2, 32, text2_active)) { + *text2_val = atof(text2); + } +} + + + +void apply_filter(float* filter, Hyper* param, float min, float max) { + for (int i=0; in; i++) { + float val = param->ary[i]; + if (val < min || val > max) { + filter[i] = 0.0f; + } + } +} + +float signed_log10(float x) { + if (fabs(x) < 1e-8) { + return -8.0f; + } + if (x > 0) { + return log10(x); + } + return -log10(-x); +} + +void calc_cmap(float* cmap, Hyper* param, float c_min, float c_max, bool log) { + if (log) { + c_min = signed_log10(c_min); + c_max = signed_log10(c_max); + } + for (int i=0; in; i++) { + float val = param->ary[i]; + if (log) { + val = signed_log10(val); + } + cmap[i] = (val - c_min)/(c_max - c_min); + } +} int main(void) { FILE *file = fopen("pufferlib/ocean/constellation/all_cache.json", "r"); @@ -662,6 +700,7 @@ int main(void) { Env *envs = calloc(data.n, sizeof(Env)); data.envs = envs; json_env = root->child; + int max_data_points = 0; for (int i=0; ichild; @@ -675,6 +714,9 @@ int main(void) { } else { assert(hyper_points == nxt_hyper_points); } + if (hyper_points > max_data_points) { + max_data_points = hyper_points; + } json_hyper = json_hyper->next; } envs[i].hypers = calloc(envs[i].n, sizeof(Hyper)); @@ -753,6 +795,8 @@ int main(void) { camera.projection = CAMERA_PERSPECTIVE; PlotArgs args1 = DEFAULT_PLOT_ARGS; RenderTexture2D fig1 = LoadRenderTexture(args1.width, args1.height); + int fig1_env_idx = 0; + bool fig1_env_active = false; bool fig1_x_active = false; int fig1_x_idx = 0; bool fig1_x_log = true; @@ -762,6 +806,9 @@ int main(void) { bool fig1_z_active = false; int fig1_z_idx = 1; bool fig1_z_log = true; + int fig1_color_idx = 0; + bool fig1_color_active = false; + bool fig1_log_color = true; PlotArgs args2 = DEFAULT_PLOT_ARGS; RenderTexture2D fig2 = LoadRenderTexture(args2.width, args2.height); @@ -819,6 +866,9 @@ int main(void) { char* y_label; char* z_label; + float *filter = calloc(max_data_points, sizeof(float)); + float *cmap = calloc(max_data_points, sizeof(float)); + Vector2 focus = {0, 0}; while (!WindowShouldClose()) { @@ -835,12 +885,12 @@ int main(void) { args1.x_label = x_label; args1.y_label = y_label; args1.z_label = z_label; - args1.x_min = hyper_min(&data, hyper_key[fig1_x_idx]); - args1.x_max = hyper_max(&data, hyper_key[fig1_x_idx]); - args1.y_min = hyper_min(&data, hyper_key[fig1_y_idx]); - args1.y_max = hyper_max(&data, hyper_key[fig1_y_idx]); - args1.z_min = hyper_min(&data, hyper_key[fig1_z_idx]); - args1.z_max = hyper_max(&data, hyper_key[fig1_z_idx]); + args1.x_min = hyper_min(&data, hyper_key[fig1_x_idx], 0, data.n); + args1.x_max = hyper_max(&data, hyper_key[fig1_x_idx], 0, data.n); + args1.y_min = hyper_min(&data, hyper_key[fig1_y_idx], 0, data.n); + args1.y_max = hyper_max(&data, hyper_key[fig1_y_idx], 0, data.n); + args1.z_min = hyper_min(&data, hyper_key[fig1_z_idx], 0, data.n); + args1.z_max = hyper_max(&data, hyper_key[fig1_z_idx], 0, data.n); float x_mid = fig1_x_log ? (log10(args1.x_max) + log10(args1.x_min))/2.0f : (args1.x_max + args1.x_min)/2.0f; float y_mid = fig1_y_log ? (log10(args1.y_max) + log10(args1.y_min))/2.0f : (args1.y_max + args1.y_min)/2.0f; float z_mid = fig1_z_log ? (log10(args1.z_max) + log10(args1.z_min))/2.0f : (args1.z_max + args1.z_min)/2.0f; @@ -850,14 +900,35 @@ int main(void) { BeginMode3D(camera); UpdateCamera(&camera, CAMERA_ORBITAL); - for (int i=0; in; j++) { + cmap[j] = i/(float)data.n; + } + } + plot3(x, y, z, fig1_x_log, fig1_y_log, fig1_z_log, args1, cmap, NULL); } - draw_axes3(args1, fig1_x_log, fig1_y_log, fig1_z_log); EndMode3D(); EndTextureMode(); @@ -866,34 +937,25 @@ int main(void) { (Rectangle){0, 0, fig1.texture.width, -fig1.texture.height }, (Vector2){ 0, SETTINGS_HEIGHT }, WHITE ); - Rectangle fig1_x_rect = {0, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; - if (GuiDropdownBox(fig1_x_rect, options, &fig1_x_idx, fig1_x_active)){ - fig1_x_active = !fig1_x_active; + Rectangle fig1_env_rect = {0, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig1_env_rect, env_options, &fig1_env_idx, fig1_env_active)){ + fig1_env_active = !fig1_env_active; } - Rectangle fig1_x_check_rect = {DROPDOWN_WIDTH, 0, SETTINGS_HEIGHT, SETTINGS_HEIGHT}; - GuiCheckBox(fig1_x_check_rect, "Log X", &fig1_x_log); - Rectangle fig1_y_rect = {DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; - if (GuiDropdownBox(fig1_y_rect, options, &fig1_y_idx, fig1_y_active)){ - fig1_y_active = !fig1_y_active; - } - Rectangle fig1_y_check_rect = {2*DROPDOWN_WIDTH+TOGGLE_WIDTH, 0, SETTINGS_HEIGHT, SETTINGS_HEIGHT}; - GuiCheckBox(fig1_y_check_rect, "Log Y", &fig1_y_log); - Rectangle fig1_z_rect = {2*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; - if (GuiDropdownBox(fig1_z_rect, options, &fig1_z_idx, fig1_z_active)){ - fig1_z_active = !fig1_z_active; - } - Rectangle fig1_z_check_rect = {3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, SETTINGS_HEIGHT, SETTINGS_HEIGHT}; - GuiCheckBox(fig1_z_check_rect, "Log Z", &fig1_z_log); + GuiDropdownCheckbox(DROPDOWN_WIDTH, 0, options, &fig1_x_idx, &fig1_x_active, "Log X", &fig1_x_log); + GuiDropdownCheckbox(2*DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, options, &fig1_y_idx, &fig1_y_active, "Log Y", &fig1_y_log); + GuiDropdownCheckbox(3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, options, &fig1_z_idx, &fig1_z_active, "Log Z", &fig1_z_log); + GuiDropdownCheckbox(4*DROPDOWN_WIDTH + 3*TOGGLE_WIDTH, 0, env_hyper_options, &fig1_color_idx, &fig1_color_active, "Log Color", &fig1_log_color); + // Figure 2 x_label = hyper_key[fig2_x_idx]; y_label = hyper_key[fig2_y_idx]; args2.x_label = x_label; args2.y_label = y_label; - args2.x_min = hyper_min(&data, hyper_key[fig2_x_idx]); - args2.x_max = hyper_max(&data, hyper_key[fig2_x_idx]); - args2.y_min = hyper_min(&data, hyper_key[fig2_y_idx]); - args2.y_max = hyper_max(&data, hyper_key[fig2_y_idx]); + args2.x_min = hyper_min(&data, hyper_key[fig2_x_idx], 0, data.n); + args2.x_max = hyper_max(&data, hyper_key[fig2_x_idx], 0, data.n); + args2.y_min = hyper_min(&data, hyper_key[fig2_y_idx], 0, data.n); + args2.y_max = hyper_max(&data, hyper_key[fig2_y_idx], 0, data.n); args2.x_min = (fig2_x_log) ? log10(args2.x_min) : args2.x_min; args2.x_max = (fig2_x_log) ? log10(args2.x_max) : args2.x_max; args2.y_min = (fig2_y_log) ? log10(args2.y_min) : args2.y_min; @@ -901,32 +963,34 @@ int main(void) { BeginTextureMode(fig2); ClearBackground(PUFF_BACKGROUND); - if (fig2_env_idx == 0) { - for (int i=0; in; j++) { + cmap[j] = i/(float)data.n; + } } - - plot(x, y, fig2_x_log, fig2_y_log, args2, COLORS[0], color_param, fig2_log_color); + plot(x, y, fig2_x_log, fig2_y_log, args2, cmap, NULL); } - draw_axes(args2); EndTextureMode(); DrawTextureRec( @@ -938,32 +1002,18 @@ int main(void) { if (GuiDropdownBox(fig2_env_rect, env_options, &fig2_env_idx, fig2_env_active)){ fig2_env_active = !fig2_env_active; } - Rectangle fig2_x_rect = {fig1.texture.width + DROPDOWN_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; - if (GuiDropdownBox(fig2_x_rect, options, &fig2_x_idx, fig2_x_active)){ - fig2_x_active = !fig2_x_active; - } - Rectangle fig2_x_check_rect = {fig1.texture.width + 2*DROPDOWN_WIDTH, 0, SETTINGS_HEIGHT, SETTINGS_HEIGHT}; - GuiCheckBox(fig2_x_check_rect, "Log X", &fig2_x_log); - Rectangle fig2_y_rect = {fig1.texture.width + 2*DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; - if (GuiDropdownBox(fig2_y_rect, options, &fig2_y_idx, fig2_y_active)){ - fig2_y_active = !fig2_y_active; - } - Rectangle fig2_y_check_rect = {fig1.texture.width + 3*DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, SETTINGS_HEIGHT, SETTINGS_HEIGHT}; - GuiCheckBox(fig2_y_check_rect, "Log Y", &fig2_y_log); - Rectangle fig2_color_rect = {fig1.texture.width + 3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; - if (GuiDropdownBox(fig2_color_rect, env_hyper_options, &fig2_color_idx, fig2_color_active)){ - fig2_color_active = !fig2_color_active; - } - Rectangle fig2_color_check_rect = {fig1.texture.width + 4*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, SETTINGS_HEIGHT, SETTINGS_HEIGHT}; - GuiCheckBox(fig2_color_check_rect, "Log Color", &fig2_log_color); + GuiDropdownCheckbox(fig1.texture.width + DROPDOWN_WIDTH, 0, options, &fig2_x_idx, &fig2_x_active, "Log X", &fig2_x_log); + GuiDropdownCheckbox(fig1.texture.width + 2*DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, options, &fig2_y_idx, &fig2_y_active, "Log Y", &fig2_y_log); + GuiDropdownCheckbox(fig1.texture.width + 3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, env_hyper_options, &fig2_color_idx, &fig2_color_active, "Log Color", &fig2_log_color); + // Figure 3 args3.x_label = "tsne1"; args3.y_label = "tsne2"; - args3.x_min = hyper_min(&data, "tsne1"); - args3.x_max = hyper_max(&data, "tsne1"); - args3.y_min = hyper_min(&data, "tsne2"); - args3.y_max = hyper_max(&data, "tsne2"); + args3.x_min = hyper_min(&data, "tsne1", 0, data.n); + args3.x_max = hyper_max(&data, "tsne1", 0, data.n); + args3.y_min = hyper_min(&data, "tsne2", 0, data.n); + args3.y_max = hyper_max(&data, "tsne2", 0, data.n); BeginTextureMode(fig3); ClearBackground(PUFF_BACKGROUND); @@ -971,13 +1021,17 @@ int main(void) { char* env = data.envs[i].key; x = get_hyper(&data, env, "tsne1"); y = get_hyper(&data, env, "tsne2"); - //plot(x, y, false, false, args3, COLORS[i]); - Hyper* filter1 = get_hyper(&data, env, hyper_key[fig3_range1_idx]); - Hyper* filter2 = get_hyper(&data, env, hyper_key[fig3_range2_idx]); - plot_filtered(x, y, false, false, args3, COLORS[i], - filter1, fig3_range1_min_val, fig3_range1_max_val, - filter2, fig3_range2_min_val, fig3_range2_max_val - ); + for (int j=0; jn; j++) { + cmap[j] = i/(float)data.n; + } + for (int j=0; jn; j++) { + filter[j] = 1.0f; + } + Hyper* filter_param_1 = get_hyper(&data, env, hyper_key[fig3_range1_idx]); + apply_filter(filter, filter_param_1, fig3_range1_min_val, fig3_range1_max_val); + Hyper* filter_param_2 = get_hyper(&data, env, hyper_key[fig3_range2_idx]); + apply_filter(filter, filter_param_2, fig3_range2_min_val, fig3_range2_max_val); + plot(x, y, false, false, args3, cmap, filter); } draw_axes(args3); EndTextureMode(); @@ -986,36 +1040,10 @@ int main(void) { (Rectangle){ 0, 0, fig3.texture.width, -fig3.texture.height }, (Vector2){ 0, SETTINGS_HEIGHT + fig1.texture.height }, WHITE ); - Rectangle fig3_range1_rect = {0, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; - if (GuiDropdownBox(fig3_range1_rect, options, &fig3_range1_idx, fig3_range1_active)){ - fig3_range1_active = !fig3_range1_active; - } - Rectangle fig3_range1_min_rect = {DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH/2, SETTINGS_HEIGHT}; - bool active = CheckCollisionPointRec(focus, fig3_range1_min_rect); - if (GuiTextBox(fig3_range1_min_rect, fig3_range1_min, 32, active)) { - fig3_range1_min_val = atof(fig3_range1_min); - } - Rectangle fig3_range1_max_rect = {1.5*DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH/2, SETTINGS_HEIGHT}; - active = CheckCollisionPointRec(focus, fig3_range1_max_rect); - if (GuiTextBox(fig3_range1_max_rect, fig3_range1_max, 32, active)) { - fig3_range1_max_val = atof(fig3_range1_max); - } - Rectangle fig3_range2_rect = {2*DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; - if (GuiDropdownBox(fig3_range2_rect, options, &fig3_range2_idx, fig3_range2_active)){ - fig3_range2_active = !fig3_range2_active; - } - Rectangle fig3_range2_min_rect = {3*DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH/2, SETTINGS_HEIGHT}; - active = CheckCollisionPointRec(focus, fig3_range2_min_rect); - if (GuiTextBox(fig3_range2_min_rect, fig3_range2_min, 32, active)) { - fig3_range2_min_val = atof(fig3_range2_min); - } - Rectangle fig3_range2_max_rect = {3.5*DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, DROPDOWN_WIDTH/2, SETTINGS_HEIGHT}; - active = CheckCollisionPointRec(focus, fig3_range2_max_rect); - if (GuiTextBox(fig3_range2_max_rect, fig3_range2_max, 32, active)) { - fig3_range2_max_val = atof(fig3_range2_max); - } - - + GuiDropdownFilter(0, fig1.texture.height, options, &fig3_range1_idx, &fig3_range1_active, focus, + fig3_range1_min, &fig3_range1_min_val, fig3_range1_max, &fig3_range1_max_val); + GuiDropdownFilter(2*DROPDOWN_WIDTH, fig1.texture.height, options, &fig3_range2_idx, &fig3_range2_active, focus, + fig3_range2_min, &fig3_range2_min_val, fig3_range2_max, &fig3_range2_max_val); // Figure 4 args4.x_label = "Value"; @@ -1028,12 +1056,11 @@ int main(void) { rlSetBlendFactorsSeparate(0x0302, 0x0303, 1, 0x0303, 0x8006, 0x8006); BeginBlendMode(BLEND_CUSTOM_SEPARATE); for (int i=0; i Date: Mon, 13 Oct 2025 21:54:32 +0000 Subject: [PATCH 058/188] Progress! --- pufferlib/ocean/constellation/constellation.c | 35 ------------------- 1 file changed, 35 deletions(-) diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index 41e006d70..72abddf7e 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -514,41 +514,6 @@ void plot(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, float* cmap } } -void plot_filtered(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, Color color, - Hyper* filter1, float f1min, float f1max, Hyper* filter2, float f2min, float f2max) { - assert(x->n == y->n); - - int width = args.width; - int height = args.height; - float x_min = args.x_min; - float x_max = args.x_max; - float y_min = args.y_min; - float y_max = args.y_max; - - float dx = x_max - x_min; - float dy = y_max - y_min; - - for (int i=0; in; i++) { - float f1 = filter1->ary[i]; - if (f1 < f1min || f1 > f1max) { - continue; - } - float f2 = filter2->ary[i]; - if (f2 < f2min || f2 > f2max) { - continue; - } - - float xi = log_x ? log10(x->ary[i]) : x->ary[i]; - float yi = log_y ? log10(y->ary[i]) : y->ary[i]; - xi = args.x_margin + (xi - x_min) / dx * (width - 2*args.x_margin); - yi = (height - args.y_margin) - (yi - y_min) / dy * (height - 2*args.y_margin); - if (xi < args.x_margin) { - int s = 2; - } - DrawCircle(xi, yi, args.line_width, color); - } -} - void plot3(Hyper* x, Hyper* y, Hyper* z, bool log_x, bool log_y, bool log_z, PlotArgs args, float* cmap, bool* filter) { assert(x->n == y->n && x->n == z->n); int width = args.width; From 9077b0ebf66ec56ac62a08d89e364d22347a3c98 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 14 Oct 2025 19:50:07 +0000 Subject: [PATCH 059/188] shaders! --- .../ocean/constellation/KHR/khrplatform.h | 311 +++ pufferlib/ocean/constellation/constellation.c | 501 ++-- pufferlib/ocean/constellation/glad.h | 2129 +++++++++++++++++ .../ocean/constellation/point_particle.fs | 27 + .../ocean/constellation/point_particle.vs | 31 + pufferlib/ocean/constellation/star_shader.fs | 11 + pufferlib/ocean/constellation/star_shader.vs | 20 + 7 files changed, 2886 insertions(+), 144 deletions(-) create mode 100644 pufferlib/ocean/constellation/KHR/khrplatform.h create mode 100644 pufferlib/ocean/constellation/glad.h create mode 100644 pufferlib/ocean/constellation/point_particle.fs create mode 100644 pufferlib/ocean/constellation/point_particle.vs create mode 100644 pufferlib/ocean/constellation/star_shader.fs create mode 100644 pufferlib/ocean/constellation/star_shader.vs diff --git a/pufferlib/ocean/constellation/KHR/khrplatform.h b/pufferlib/ocean/constellation/KHR/khrplatform.h new file mode 100644 index 000000000..01646449c --- /dev/null +++ b/pufferlib/ocean/constellation/KHR/khrplatform.h @@ -0,0 +1,311 @@ +#ifndef __khrplatform_h_ +#define __khrplatform_h_ + +/* +** Copyright (c) 2008-2018 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +/* Khronos platform-specific types and definitions. + * + * The master copy of khrplatform.h is maintained in the Khronos EGL + * Registry repository at https://github.com/KhronosGroup/EGL-Registry + * The last semantic modification to khrplatform.h was at commit ID: + * 67a3e0864c2d75ea5287b9f3d2eb74a745936692 + * + * Adopters may modify this file to suit their platform. Adopters are + * encouraged to submit platform specific modifications to the Khronos + * group so that they can be included in future versions of this file. + * Please submit changes by filing pull requests or issues on + * the EGL Registry repository linked above. + * + * + * See the Implementer's Guidelines for information about where this file + * should be located on your system and for more details of its use: + * http://www.khronos.org/registry/implementers_guide.pdf + * + * This file should be included as + * #include + * by Khronos client API header files that use its types and defines. + * + * The types in khrplatform.h should only be used to define API-specific types. + * + * Types defined in khrplatform.h: + * khronos_int8_t signed 8 bit + * khronos_uint8_t unsigned 8 bit + * khronos_int16_t signed 16 bit + * khronos_uint16_t unsigned 16 bit + * khronos_int32_t signed 32 bit + * khronos_uint32_t unsigned 32 bit + * khronos_int64_t signed 64 bit + * khronos_uint64_t unsigned 64 bit + * khronos_intptr_t signed same number of bits as a pointer + * khronos_uintptr_t unsigned same number of bits as a pointer + * khronos_ssize_t signed size + * khronos_usize_t unsigned size + * khronos_float_t signed 32 bit floating point + * khronos_time_ns_t unsigned 64 bit time in nanoseconds + * khronos_utime_nanoseconds_t unsigned time interval or absolute time in + * nanoseconds + * khronos_stime_nanoseconds_t signed time interval in nanoseconds + * khronos_boolean_enum_t enumerated boolean type. This should + * only be used as a base type when a client API's boolean type is + * an enum. Client APIs which use an integer or other type for + * booleans cannot use this as the base type for their boolean. + * + * Tokens defined in khrplatform.h: + * + * KHRONOS_FALSE, KHRONOS_TRUE Enumerated boolean false/true values. + * + * KHRONOS_SUPPORT_INT64 is 1 if 64 bit integers are supported; otherwise 0. + * KHRONOS_SUPPORT_FLOAT is 1 if floats are supported; otherwise 0. + * + * Calling convention macros defined in this file: + * KHRONOS_APICALL + * KHRONOS_APIENTRY + * KHRONOS_APIATTRIBUTES + * + * These may be used in function prototypes as: + * + * KHRONOS_APICALL void KHRONOS_APIENTRY funcname( + * int arg1, + * int arg2) KHRONOS_APIATTRIBUTES; + */ + +#if defined(__SCITECH_SNAP__) && !defined(KHRONOS_STATIC) +# define KHRONOS_STATIC 1 +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APICALL + *------------------------------------------------------------------------- + * This precedes the return type of the function in the function prototype. + */ +#if defined(KHRONOS_STATIC) + /* If the preprocessor constant KHRONOS_STATIC is defined, make the + * header compatible with static linking. */ +# define KHRONOS_APICALL +#elif defined(_WIN32) +# define KHRONOS_APICALL __declspec(dllimport) +#elif defined (__SYMBIAN32__) +# define KHRONOS_APICALL IMPORT_C +#elif defined(__ANDROID__) +# define KHRONOS_APICALL __attribute__((visibility("default"))) +#else +# define KHRONOS_APICALL +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APIENTRY + *------------------------------------------------------------------------- + * This follows the return type of the function and precedes the function + * name in the function prototype. + */ +#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__) + /* Win32 but not WinCE */ +# define KHRONOS_APIENTRY __stdcall +#else +# define KHRONOS_APIENTRY +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APIATTRIBUTES + *------------------------------------------------------------------------- + * This follows the closing parenthesis of the function prototype arguments. + */ +#if defined (__ARMCC_2__) +#define KHRONOS_APIATTRIBUTES __softfp +#else +#define KHRONOS_APIATTRIBUTES +#endif + +/*------------------------------------------------------------------------- + * basic type definitions + *-----------------------------------------------------------------------*/ +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__GNUC__) || defined(__SCO__) || defined(__USLC__) + + +/* + * Using + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 +/* + * To support platform where unsigned long cannot be used interchangeably with + * inptr_t (e.g. CHERI-extended ISAs), we can use the stdint.h intptr_t. + * Ideally, we could just use (u)intptr_t everywhere, but this could result in + * ABI breakage if khronos_uintptr_t is changed from unsigned long to + * unsigned long long or similar (this results in different C++ name mangling). + * To avoid changes for existing platforms, we restrict usage of intptr_t to + * platforms where the size of a pointer is larger than the size of long. + */ +#if defined(__SIZEOF_LONG__) && defined(__SIZEOF_POINTER__) +#if __SIZEOF_POINTER__ > __SIZEOF_LONG__ +#define KHRONOS_USE_INTPTR_T +#endif +#endif + +#elif defined(__VMS ) || defined(__sgi) + +/* + * Using + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(_WIN32) && !defined(__SCITECH_SNAP__) + +/* + * Win32 + */ +typedef __int32 khronos_int32_t; +typedef unsigned __int32 khronos_uint32_t; +typedef __int64 khronos_int64_t; +typedef unsigned __int64 khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(__sun__) || defined(__digital__) + +/* + * Sun or Digital + */ +typedef int khronos_int32_t; +typedef unsigned int khronos_uint32_t; +#if defined(__arch64__) || defined(_LP64) +typedef long int khronos_int64_t; +typedef unsigned long int khronos_uint64_t; +#else +typedef long long int khronos_int64_t; +typedef unsigned long long int khronos_uint64_t; +#endif /* __arch64__ */ +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif 0 + +/* + * Hypothetical platform with no float or int64 support + */ +typedef int khronos_int32_t; +typedef unsigned int khronos_uint32_t; +#define KHRONOS_SUPPORT_INT64 0 +#define KHRONOS_SUPPORT_FLOAT 0 + +#else + +/* + * Generic fallback + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#endif + + +/* + * Types that are (so far) the same on all platforms + */ +typedef signed char khronos_int8_t; +typedef unsigned char khronos_uint8_t; +typedef signed short int khronos_int16_t; +typedef unsigned short int khronos_uint16_t; + +/* + * Types that differ between LLP64 and LP64 architectures - in LLP64, + * pointers are 64 bits, but 'long' is still 32 bits. Win64 appears + * to be the only LLP64 architecture in current use. + */ +#ifdef KHRONOS_USE_INTPTR_T +typedef intptr_t khronos_intptr_t; +typedef uintptr_t khronos_uintptr_t; +#elif defined(_WIN64) +typedef signed long long int khronos_intptr_t; +typedef unsigned long long int khronos_uintptr_t; +#else +typedef signed long int khronos_intptr_t; +typedef unsigned long int khronos_uintptr_t; +#endif + +#if defined(_WIN64) +typedef signed long long int khronos_ssize_t; +typedef unsigned long long int khronos_usize_t; +#else +typedef signed long int khronos_ssize_t; +typedef unsigned long int khronos_usize_t; +#endif + +#if KHRONOS_SUPPORT_FLOAT +/* + * Float type + */ +typedef float khronos_float_t; +#endif + +#if KHRONOS_SUPPORT_INT64 +/* Time types + * + * These types can be used to represent a time interval in nanoseconds or + * an absolute Unadjusted System Time. Unadjusted System Time is the number + * of nanoseconds since some arbitrary system event (e.g. since the last + * time the system booted). The Unadjusted System Time is an unsigned + * 64 bit value that wraps back to 0 every 584 years. Time intervals + * may be either signed or unsigned. + */ +typedef khronos_uint64_t khronos_utime_nanoseconds_t; +typedef khronos_int64_t khronos_stime_nanoseconds_t; +#endif + +/* + * Dummy value used to pad enum types to 32 bits. + */ +#ifndef KHRONOS_MAX_ENUM +#define KHRONOS_MAX_ENUM 0x7FFFFFFF +#endif + +/* + * Enumerated boolean type + * + * Values other than zero should be considered to be true. Therefore + * comparisons should not be made against KHRONOS_TRUE. + */ +typedef enum { + KHRONOS_FALSE = 0, + KHRONOS_TRUE = 1, + KHRONOS_BOOLEAN_ENUM_FORCE_SIZE = KHRONOS_MAX_ENUM +} khronos_boolean_enum_t; + +#endif /* __khrplatform_h_ */ diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index 72abddf7e..5f1ae743b 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -4,13 +4,36 @@ #include #include #include + +#include "cJSON.h" #include "raylib.h" -#include "rlgl.h" #define RAYGUI_IMPLEMENTATION #include "raygui.h" -#include "cJSON.h" +#if defined(PLATFORM_DESKTOP) || defined(PLATFORM_DESKTOP_SDL) + #if defined(GRAPHICS_API_OPENGL_ES2) + #include "glad_gles2.h" // Required for: OpenGL functionality + #define glGenVertexArrays glGenVertexArraysOES + #define glBindVertexArray glBindVertexArrayOES + #define glDeleteVertexArrays glDeleteVertexArraysOES + #define GLSL_VERSION 100 + #else + #if defined(__APPLE__) + #define GL_SILENCE_DEPRECATION // Silence Opengl API deprecation warnings + #include // OpenGL 3 library for OSX + #include // OpenGL 3 extensions library for OSX + #else + #include "glad.h" // Required for: OpenGL functionality + #endif + #define GLSL_VERSION 330 + #endif +#else // PLATFORM_ANDROID, PLATFORM_WEB + #define GLSL_VERSION 100 +#endif + +#include "rlgl.h" +#include "raymath.h" const Color PUFF_RED = (Color){187, 0, 0, 255}; const Color PUFF_CYAN = (Color){0, 187, 187, 255}; @@ -25,6 +48,23 @@ Color COLORS[] = { const float EMPTY = -4242.0f; +#define MAX_PARTICLES 1000 + +typedef struct Particle { + float x; + float y; + float period; + float r; + float g; + float b; + float a; +} Particle; + +typedef struct VertexBuffer { + float* vertices; + int n; +} VertexBuffer; + #define SEP 4 #define SETTINGS_HEIGHT 20 #define TOGGLE_WIDTH 60 @@ -89,6 +129,7 @@ typedef struct PlotArgs { int tick_length; int x_margin; int y_margin; + int tick_margin; Color font_color; Color background_color; Color axis_color; @@ -114,6 +155,7 @@ PlotArgs DEFAULT_PLOT_ARGS = { .legend_font_size = 12, .line_width = 2, .tick_length = 8, + .tick_margin = 8, .x_margin = 100, .y_margin = 70, .font_color = PUFF_WHITE, @@ -124,6 +166,16 @@ PlotArgs DEFAULT_PLOT_ARGS = { .z_label = "Train/Learning Rate", }; +float signed_log10(float x) { + if (fabs(x) < 1e-8) { + return -8.0f; + } + if (x > 0) { + return log10(x); + } + return -log10(-x); +} + const char* format_tick_label(double value) { static char buffer[32]; int precision = 2; @@ -188,14 +240,14 @@ void draw_axes(PlotArgs args) { // Autofit number of ticks Vector2 tick_label_size = MeasureTextEx(args.font, "estimate", args.axis_font_size, 0); - int num_x_ticks = (width - 2*args.x_margin)/tick_label_size.x; - int num_y_ticks = (height - 2*args.y_margin)/tick_label_size.x; + int num_x_ticks = 1 + (width - 2*args.x_margin)/tick_label_size.x; + int num_y_ticks = 1 + (height - 2*args.y_margin)/tick_label_size.y; // X ticks for (int i=0; iary[0]; - for (int i=1; in; i++) { - if (hyper->ary[i] > max) max = hyper->ary[i]; - } - return max; -} - -float ary_min(float* ary, int num) { - float min = ary[0]; - for (int i=1; i max) max = ary[i]; - } - return max; -} - -*/ - -void boxplot(Dataset* data, bool log_x, char* env, char* hyper_key[], int hyper_count, PlotArgs args, Color color, float* filter) { +void boxplot(Hyper* hyper, bool log_x, int i, int hyper_count, PlotArgs args, Color color, bool* filter) { int width = args.width; int height = args.height; @@ -461,30 +491,140 @@ void boxplot(Dataset* data, bool log_x, char* env, char* hyper_key[], int hyper_ Color faded = Fade(color, 0.15f); - for (int i=0; iary; - float mmin = ary[0]; - float mmax = ary[0]; - for (int j=0; jn; j++) { - if (filter != NULL && !filter[j]) { - continue; - } - mmin = fmin(mmin, ary[j]); - mmax = fmax(mmax, ary[j]); - } - - if (log_x) { - mmin = mmin <= 0 ? 0 : log10(mmin); - mmax = mmax <= 0 ? 0 : log10(mmax); + float* ary = hyper->ary; + float mmin = ary[0]; + float mmax = ary[0]; + for (int j=0; jn; j++) { + if (filter != NULL && !filter[j]) { + continue; } + mmin = fmin(mmin, ary[j]); + mmax = fmax(mmax, ary[j]); + } - float left = args.x_margin + (mmin - x_min)/(x_max - x_min)*(width - 2*args.x_margin); - float right = args.x_margin + (mmax - x_min)/(x_max - x_min)*(width - 2*args.x_margin); - DrawRectangle(left, args.y_margin + i*dy, right - left, dy, faded); + if (log_x) { + mmin = mmin <= 0 ? 0 : log10(mmin); + mmax = mmax <= 0 ? 0 : log10(mmax); } + + float left = args.x_margin + (mmin - x_min)/(x_max - x_min)*(width - 2*args.x_margin); + float right = args.x_margin + (mmax - x_min)/(x_max - x_min)*(width - 2*args.x_margin); + + // TODO - rough patch + left = fmax(left, args.x_margin); + right = fmin(right, width - args.x_margin); + DrawRectangle(left, args.y_margin + i*dy, right - left, dy, faded); +} + +// Struct for vertex data (screen-space position and color) +typedef struct { + Vector2 pos; // Screen-space x, y + Color color; // RGBA color +} PlotVertex; + +void plot_gl(Shader shader, VertexBuffer vertices) { + Particle* particles = vertices.vertices; + int n = vertices.n; + + GLuint vao = 0; + GLuint vbo = 0; + glGenVertexArrays(1, &vao); + glBindVertexArray(vao); + glGenBuffers(1, &vbo); + glBindBuffer(GL_ARRAY_BUFFER, vbo); + glBufferData(GL_ARRAY_BUFFER, n*sizeof(Particle), particles, GL_STATIC_DRAW); + glVertexAttribPointer(shader.locs[SHADER_LOC_VERTEX_POSITION], 3, GL_FLOAT, GL_FALSE, sizeof(Particle), 0); + glEnableVertexAttribArray(shader.locs[SHADER_LOC_VERTEX_POSITION]); + int vertexColorLoc = shader.locs[SHADER_LOC_VERTEX_COLOR]; + glVertexAttribPointer(vertexColorLoc, 4, GL_FLOAT, GL_FALSE, sizeof(Particle), (void*)(3*sizeof(float))); + glEnableVertexAttribArray(vertexColorLoc); + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindVertexArray(0); + + + rlDrawRenderBatchActive(); // Draw iternal buffers data (previous draw calls) + + int currentTimeLoc = GetShaderLocation(shader, "currentTime"); + // Switch to plain OpenGL + //------------------------------------------------------------------------------ + glUseProgram(shader.id); + + glUniform1f(currentTimeLoc, GetTime()); + + // Get the current modelview and projection matrix so the particle system is displayed and transformed + Matrix modelViewProjection = MatrixMultiply(rlGetMatrixModelview(), rlGetMatrixProjection()); + + glUniformMatrix4fv(shader.locs[SHADER_LOC_MATRIX_MVP], 1, false, MatrixToFloat(modelViewProjection)); + + glBindVertexArray(vao); + glDrawArrays(GL_POINTS, 0, n); + glBindVertexArray(0); + + glUseProgram(0); + //------------------------------------------------------------------------------ + glDeleteBuffers(1, &vbo); + glDeleteVertexArrays(1, &vao); } +void plot(Shader shader, Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, float* cmap, bool* filter) { + assert(x->n == y->n); + + int width = args.width; + int height = args.height; + + // Compute ranges and apply log scaling if needed + //float x_min = log_x ? log10f(args.x_min) : args.x_min; + //float x_max = log_x ? log10f(args.x_max) : args.x_max; + //float y_min = log_y ? log10f(args.y_min) : args.y_min; + //float y_max = log_y ? log10f(args.y_max) : args.y_max; + float x_min = args.x_min; + float x_max = args.x_max; + float y_min = args.y_min; + float y_max = args.y_max; + + float dx = x_max - x_min; + float dy = y_max - y_min; + + // Count valid points after filtering + int valid_count = 0; + for (int i = 0; i < x->n; i++) { + if (filter == NULL || filter[i]) valid_count++; + } + + if (valid_count == 0) return; // Early exit if no points + + // Allocate vertex array + PlotVertex* vertices = (PlotVertex*)malloc(valid_count * sizeof(PlotVertex)); + int idx = 0; + + // Preprocess points: transform and map to screen space + Particle particles[MAX_PARTICLES] = { 0 }; + for (int i = 0; i < x->n; i++) { + if (filter != NULL && !filter[i]) continue; + + // Apply log scaling + float xi = log_x ? log10f(x->ary[i]) : x->ary[i]; + float yi = log_y ? log10f(y->ary[i]) : y->ary[i]; + + // Map to screen coordinates with margins + xi = args.x_margin + (xi - x_min) / dx * (width - 2 * args.x_margin); + yi = (height - args.y_margin) - (yi - y_min) / dy * (height - 2 * args.y_margin); + + particles[i].x = xi; + particles[i].y = yi; + particles[i].period = 10.0f; + Color c = rgb(cmap[i]); + particles[i].r = c.r/255.0f; + particles[i].g = c.g/255.0f; + particles[i].b = c.b/255.0f; + particles[i].a = c.a/255.0f; + idx++; + } + + VertexBuffer buffer = {&particles, MAX_PARTICLES}; + plot_gl(shader, buffer); +} +/* void plot(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, float* cmap, bool* filter) { assert(x->n == y->n); @@ -506,19 +646,15 @@ void plot(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, float* cmap float yi = log_y ? log10(y->ary[i]) : y->ary[i]; xi = args.x_margin + (xi - x_min) / dx * (width - 2*args.x_margin); yi = (height - args.y_margin) - (yi - y_min) / dy * (height - 2*args.y_margin); - if (xi < args.x_margin) { - int s = 2; - } Color c = rgb(cmap[i]); DrawCircle(xi, yi, args.line_width, c); } } +*/ -void plot3(Hyper* x, Hyper* y, Hyper* z, bool log_x, bool log_y, bool log_z, PlotArgs args, float* cmap, bool* filter) { +void plot3(Camera3D camera, Shader shader, Hyper* x, Hyper* y, Hyper* z, + bool log_x, bool log_y, bool log_z, PlotArgs args, float* cmap, bool* filter) { assert(x->n == y->n && x->n == z->n); - int width = args.width; - int height = args.height; - float x_min = args.x_min; float x_max = args.x_max; float y_min = args.y_min; @@ -526,31 +662,69 @@ void plot3(Hyper* x, Hyper* y, Hyper* z, bool log_x, bool log_y, bool log_z, Plo float z_min = args.z_min; float z_max = args.z_max; + if (log_x) { + x_min = signed_log10(x_min); + x_max = signed_log10(x_max); + } + if (log_y) { + y_min = signed_log10(y_min); + y_max = signed_log10(y_max); + } + if (log_z) { + z_min = signed_log10(z_min); + z_max = signed_log10(z_max); + } + float dx = x_max - x_min; float dy = y_max - y_min; float dz = z_max - z_min; - if (dx == 0) dx = 1.0f; - if (dy == 0) dy = 1.0f; - if (dz == 0) dz = 1.0f; - x_min -= 0.1f * dx; x_max += 0.1f * dx; - y_min -= 0.1f * dy; y_max += 0.1f * dy; - z_min -= 0.1f * dz; z_max += 0.1f * dz; - dx = x_max - x_min; - dy = y_max - y_min; - dz = z_max - z_min; - + Particle particles[MAX_PARTICLES] = { 0 }; + int idx = 0; // Plot lines for (int i = 0; i < x->n; i++) { if (filter != NULL && !filter[i]) { continue; } - float xi = (log_x) ? log10(x->ary[i]) : x->ary[i]; - float yi = (log_y) ? log10(y->ary[i]) : y->ary[i]; - float zi = (log_z) ? log10(z->ary[i]) : z->ary[i]; + float xi = (log_x) ? signed_log10(x->ary[i]) : x->ary[i]; + float yi = (log_y) ? signed_log10(y->ary[i]) : y->ary[i]; + float zi = (log_z) ? signed_log10(z->ary[i]) : z->ary[i]; + Color c = rgb(cmap[i]); - DrawCube((Vector3){xi, yi, zi}, 0.02f, 0.02f, 0.02f, c); + Vector3 point = (Vector3){(xi - x_min)/dx, (yi - y_min)/dy, (zi - z_min)/dz}; + + /* + DrawCube( + (Vector3){(xi - x_min)/dx, (yi - y_min)/dy, (zi - z_min)/dz}, + 0.02f, 0.02f, 0.02f, c + ); + + DrawSphere( + (Vector3){(xi - x_min)/dx, (yi - y_min)/dy, (zi - z_min)/dz}, + 0.02f, c + ); + */ + + // Project to screen space + Vector2 screen_pos = GetWorldToScreenEx(point, camera, 960, 520); + + particles[i].x = screen_pos.x; + particles[i].y = screen_pos.y; + particles[i].period = 10.0f; + c = rgb(cmap[i]); + particles[i].r = c.r/255.0f; + particles[i].g = c.g/255.0f; + particles[i].b = c.b/255.0f; + particles[i].a = c.a/255.0f; + idx++; + + //DrawBillboard(camera, whiteTexture, point, 0.1f, c); + } + VertexBuffer buffer = {&particles, idx}; + plot_gl(shader, buffer); + + } @@ -595,25 +769,15 @@ void GuiDropdownFilter(int x, int y, char* options, int *selection, bool *dropdo -void apply_filter(float* filter, Hyper* param, float min, float max) { +void apply_filter(bool* filter, Hyper* param, float min, float max) { for (int i=0; in; i++) { float val = param->ary[i]; if (val < min || val > max) { - filter[i] = 0.0f; + filter[i] = false; } } } -float signed_log10(float x) { - if (fabs(x) < 1e-8) { - return -8.0f; - } - if (x > 0) { - return log10(x); - } - return -log10(-x); -} - void calc_cmap(float* cmap, Hyper* param, float c_min, float c_max, bool log) { if (log) { c_min = signed_log10(c_min); @@ -703,10 +867,32 @@ int main(void) { } } - int hyper_count = 9; - char *hyper_key[9] = { - "agent_steps", "cost", "environment/perf", "environment/score", - "train/learning_rate", "train/gamma", "train/gae_lambda", "train/ent_coef", "train/vf_coef" + int hyper_count = 24; + char *hyper_key[24] = { + "agent_steps", + "cost", + "environment/perf", + "environment/score", + "train/learning_rate", + "train/ent_coef", + "train/gamma", + "train/gae_lambda", + "train/vtrace_rho_clip", + "train/vtrace_c_clip", + "train/clip_coef", + "train/vf_clip_coef", + "train/vf_coef", + "train/max_grad_norm", + "train/adam_beta1", + "train/adam_beta2", + "train/adam_eps", + "train/prio_alpha", + "train/prio_beta0", + "train/bptt_horizon", + "train/num_minibatches", + "train/minibatch_size", + "policy/hidden_size", + "env/num_envs", }; //char* items[] = {"environment/score", "cost", "train/learning_rate", "train/gamma", "train/gae_lambda"}; @@ -749,12 +935,20 @@ int main(void) { ClearBackground(PUFF_BACKGROUND); SetTargetFPS(60); + Shader shader = LoadShader(TextFormat("pufferlib/ocean/constellation/point_particle.vs", GLSL_VERSION), + TextFormat("pufferlib/ocean/constellation/point_particle.fs", GLSL_VERSION)); + + // Allows the vertex shader to set the point size of each particle individually + #ifndef GRAPHICS_API_OPENGL_ES2 + glEnable(GL_PROGRAM_POINT_SIZE); + #endif + DEFAULT_PLOT_ARGS.font = LoadFontEx("resources/shared/JetBrainsMono-SemiBold.ttf", 32, NULL, 255); DEFAULT_PLOT_ARGS.font_small = LoadFontEx("resources/shared/JetBrainsMono-SemiBold.ttf", 16, NULL, 255); Camera3D camera = (Camera3D){ 0 }; - camera.position = (Vector3){ 5.0f, 2.0f, 5.0f }; - camera.target = (Vector3){ 0.0f, 0.0f, 0.0f }; + camera.position = (Vector3){ 2.0f, 2.0f, 2.0f }; + camera.target = (Vector3){ 0.5f, 0.5f, 0.5f }; camera.up = (Vector3){ 0.0f, 1.0f, 0.0f }; camera.fovy = 45.0f; camera.projection = CAMERA_PERSPECTIVE; @@ -807,8 +1001,6 @@ int main(void) { PlotArgs args4 = DEFAULT_PLOT_ARGS; RenderTexture2D fig4 = LoadRenderTexture(args4.width, args4.height); - float *box_mmin = malloc(hyper_count * sizeof(float)); - float *box_mmax = malloc(hyper_count * sizeof(float)); bool fig4_x_log = true; bool fig4_range1_active = false; int fig4_range1_idx = 2; @@ -826,12 +1018,11 @@ int main(void) { Hyper* x; Hyper* y; Hyper* z; - int num_points; char* x_label; char* y_label; char* z_label; - float *filter = calloc(max_data_points, sizeof(float)); + bool *filter = calloc(max_data_points, sizeof(bool)); float *cmap = calloc(max_data_points, sizeof(float)); Vector2 focus = {0, 0}; @@ -850,29 +1041,27 @@ int main(void) { args1.x_label = x_label; args1.y_label = y_label; args1.z_label = z_label; - args1.x_min = hyper_min(&data, hyper_key[fig1_x_idx], 0, data.n); - args1.x_max = hyper_max(&data, hyper_key[fig1_x_idx], 0, data.n); - args1.y_min = hyper_min(&data, hyper_key[fig1_y_idx], 0, data.n); - args1.y_max = hyper_max(&data, hyper_key[fig1_y_idx], 0, data.n); - args1.z_min = hyper_min(&data, hyper_key[fig1_z_idx], 0, data.n); - args1.z_max = hyper_max(&data, hyper_key[fig1_z_idx], 0, data.n); - float x_mid = fig1_x_log ? (log10(args1.x_max) + log10(args1.x_min))/2.0f : (args1.x_max + args1.x_min)/2.0f; - float y_mid = fig1_y_log ? (log10(args1.y_max) + log10(args1.y_min))/2.0f : (args1.y_max + args1.y_min)/2.0f; - float z_mid = fig1_z_log ? (log10(args1.z_max) + log10(args1.z_min))/2.0f : (args1.z_max + args1.z_min)/2.0f; - camera.target = (Vector3){x_mid, y_mid, z_mid}; - BeginTextureMode(fig1); - ClearBackground(PUFF_BACKGROUND); - BeginMode3D(camera); - UpdateCamera(&camera, CAMERA_ORBITAL); - int start = 0; int end = data.n; float c_min = 0.0f; float c_max = 1.0f; if (fig1_env_idx != 0) { - start = fig1_env_idx; - end = fig1_env_idx + 1; + start = fig1_env_idx - 1; + end = fig1_env_idx; } + args1.x_min = hyper_min(&data, hyper_key[fig1_x_idx], start, end); + args1.x_max = hyper_max(&data, hyper_key[fig1_x_idx], start, end); + args1.y_min = hyper_min(&data, hyper_key[fig1_y_idx], start, end); + args1.y_max = hyper_max(&data, hyper_key[fig1_y_idx], start, end); + args1.z_min = hyper_min(&data, hyper_key[fig1_z_idx], start, end); + args1.z_max = hyper_max(&data, hyper_key[fig1_z_idx], start, end); + float x_mid = fig1_x_log ? (log10(args1.x_max) + log10(args1.x_min))/2.0f : (args1.x_max + args1.x_min)/2.0f; + float y_mid = fig1_y_log ? (log10(args1.y_max) + log10(args1.y_min))/2.0f : (args1.y_max + args1.y_min)/2.0f; + float z_mid = fig1_z_log ? (log10(args1.z_max) + log10(args1.z_min))/2.0f : (args1.z_max + args1.z_min)/2.0f; + //camera.target = (Vector3){x_mid, y_mid, z_mid}; + BeginTextureMode(fig1); + ClearBackground(PUFF_BACKGROUND); + if (fig1_color_idx != 0) { c_min = hyper_min(&data, hyper_key[fig1_color_idx - 1], start, end); c_max = hyper_max(&data, hyper_key[fig1_color_idx - 1], start, end); @@ -892,8 +1081,16 @@ int main(void) { cmap[j] = i/(float)data.n; } } - plot3(x, y, z, fig1_x_log, fig1_y_log, fig1_z_log, args1, cmap, NULL); + //rlSetBlendMode(RL_BLEND_ADDITIVE); + //BeginShaderMode(shader); + plot3(camera, shader, x, y, z, fig1_x_log, fig1_y_log, fig1_z_log, args1, cmap, NULL); + //EndShaderMode(); + //rlSetBlendMode(RL_BLEND_ALPHA); } + BeginMode3D(camera); + UpdateCamera(&camera, CAMERA_ORBITAL); + + draw_axes3(args1, fig1_x_log, fig1_y_log, fig1_z_log); EndMode3D(); EndTextureMode(); @@ -917,14 +1114,6 @@ int main(void) { y_label = hyper_key[fig2_y_idx]; args2.x_label = x_label; args2.y_label = y_label; - args2.x_min = hyper_min(&data, hyper_key[fig2_x_idx], 0, data.n); - args2.x_max = hyper_max(&data, hyper_key[fig2_x_idx], 0, data.n); - args2.y_min = hyper_min(&data, hyper_key[fig2_y_idx], 0, data.n); - args2.y_max = hyper_max(&data, hyper_key[fig2_y_idx], 0, data.n); - args2.x_min = (fig2_x_log) ? log10(args2.x_min) : args2.x_min; - args2.x_max = (fig2_x_log) ? log10(args2.x_max) : args2.x_max; - args2.y_min = (fig2_y_log) ? log10(args2.y_min) : args2.y_min; - args2.y_max = (fig2_y_log) ? log10(args2.y_max) : args2.y_max; BeginTextureMode(fig2); ClearBackground(PUFF_BACKGROUND); @@ -933,15 +1122,28 @@ int main(void) { c_min = 0.0f; c_max = 1.0f; if (fig2_env_idx != 0) { - start = fig2_env_idx; - end = fig2_env_idx + 1; + start = fig2_env_idx - 1; + end = fig2_env_idx; } + + args2.x_min = hyper_min(&data, hyper_key[fig2_x_idx], start, end); + args2.x_max = hyper_max(&data, hyper_key[fig2_x_idx], start, end); + args2.y_min = hyper_min(&data, hyper_key[fig2_y_idx], start, end); + args2.y_max = hyper_max(&data, hyper_key[fig2_y_idx], start, end); + args2.x_min = (fig2_x_log) ? log10(args2.x_min) : args2.x_min; + args2.x_max = (fig2_x_log) ? log10(args2.x_max) : args2.x_max; + args2.y_min = (fig2_y_log) ? log10(args2.y_min) : args2.y_min; + args2.y_max = (fig2_y_log) ? log10(args2.y_max) : args2.y_max; + if (fig2_color_idx != 0) { c_min = hyper_min(&data, hyper_key[fig2_color_idx - 1], start, end); c_max = hyper_max(&data, hyper_key[fig2_color_idx - 1], start, end); } memset(cmap, 0.0f, data.n * sizeof(float)); color_param = NULL; + + //rlSetBlendMode(RL_BLEND_ADDITIVE); + //BeginShaderMode(shader); for (int i=start; in; j++) { - filter[j] = 1.0f; + filter[j] = true; } Hyper* filter_param_1 = get_hyper(&data, env, hyper_key[fig3_range1_idx]); apply_filter(filter, filter_param_1, fig3_range1_min_val, fig3_range1_max_val); Hyper* filter_param_2 = get_hyper(&data, env, hyper_key[fig3_range2_idx]); apply_filter(filter, filter_param_2, fig3_range2_min_val, fig3_range2_max_val); - plot(x, y, false, false, args3, cmap, filter); + plot(shader, x, y, false, false, args3, cmap, filter); } draw_axes(args3); EndTextureMode(); @@ -1021,11 +1226,18 @@ int main(void) { rlSetBlendFactorsSeparate(0x0302, 0x0303, 1, 0x0303, 0x8006, 0x8006); BeginBlendMode(BLEND_CUSTOM_SEPARATE); for (int i=0; ikey, hyper_key[fig4_range1_idx]); + Hyper* filter_param_2 = get_hyper(&data, env->key, hyper_key[fig4_range2_idx]); + for (int j=0; jkey, hyper_key[j]); + for (int k=0; kn; k++) { + filter[k] = true; + } + apply_filter(filter, filter_param_1, fig4_range1_min_val, fig4_range1_max_val); + apply_filter(filter, filter_param_2, fig4_range2_min_val, fig4_range2_max_val); + boxplot(hyper, fig4_x_log, j, hyper_count, args4, PUFF_CYAN, filter); + } } EndBlendMode(); draw_box_axes(hyper_key, hyper_count, args4); @@ -1044,6 +1256,7 @@ int main(void) { EndDrawing(); } + UnloadShader(shader); CloseWindow(); return 0; } diff --git a/pufferlib/ocean/constellation/glad.h b/pufferlib/ocean/constellation/glad.h new file mode 100644 index 000000000..20ff05c28 --- /dev/null +++ b/pufferlib/ocean/constellation/glad.h @@ -0,0 +1,2129 @@ +/* + + OpenGL loader generated by glad 0.1.36 on Tue Oct 14 18:01:52 2025. + + Language/Generator: C/C++ + Specification: gl + APIs: gl=3.3 + Profile: core + Extensions: + + Loader: True + Local files: False + Omit khrplatform: False + Reproducible: False + + Commandline: + --profile="core" --api="gl=3.3" --generator="c" --spec="gl" --extensions="" + Online: + https://glad.dav1d.de/#profile=core&language=c&specification=gl&loader=on&api=gl%3D3.3 +*/ + + +#ifndef __glad_h_ +#define __glad_h_ + +#ifdef __gl_h_ +#error OpenGL header already included, remove this include, glad already provides it +#endif +#define __gl_h_ + +#if defined(_WIN32) && !defined(APIENTRY) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) +#define APIENTRY __stdcall +#endif + +#ifndef APIENTRY +#define APIENTRY +#endif +#ifndef APIENTRYP +#define APIENTRYP APIENTRY * +#endif + +#ifndef GLAPIENTRY +#define GLAPIENTRY APIENTRY +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +struct gladGLversionStruct { + int major; + int minor; +}; + +typedef void* (* GLADloadproc)(const char *name); + +#ifndef GLAPI +# if defined(GLAD_GLAPI_EXPORT) +# if defined(_WIN32) || defined(__CYGWIN__) +# if defined(GLAD_GLAPI_EXPORT_BUILD) +# if defined(__GNUC__) +# define GLAPI __attribute__ ((dllexport)) extern +# else +# define GLAPI __declspec(dllexport) extern +# endif +# else +# if defined(__GNUC__) +# define GLAPI __attribute__ ((dllimport)) extern +# else +# define GLAPI __declspec(dllimport) extern +# endif +# endif +# elif defined(__GNUC__) && defined(GLAD_GLAPI_EXPORT_BUILD) +# define GLAPI __attribute__ ((visibility ("default"))) extern +# else +# define GLAPI extern +# endif +# else +# define GLAPI extern +# endif +#endif + +GLAPI struct gladGLversionStruct GLVersion; + +GLAPI int gladLoadGL(void); + +GLAPI int gladLoadGLLoader(GLADloadproc); + +#include +typedef unsigned int GLenum; +typedef unsigned char GLboolean; +typedef unsigned int GLbitfield; +typedef void GLvoid; +typedef khronos_int8_t GLbyte; +typedef khronos_uint8_t GLubyte; +typedef khronos_int16_t GLshort; +typedef khronos_uint16_t GLushort; +typedef int GLint; +typedef unsigned int GLuint; +typedef khronos_int32_t GLclampx; +typedef int GLsizei; +typedef khronos_float_t GLfloat; +typedef khronos_float_t GLclampf; +typedef double GLdouble; +typedef double GLclampd; +typedef void *GLeglClientBufferEXT; +typedef void *GLeglImageOES; +typedef char GLchar; +typedef char GLcharARB; +#ifdef __APPLE__ +typedef void *GLhandleARB; +#else +typedef unsigned int GLhandleARB; +#endif +typedef khronos_uint16_t GLhalf; +typedef khronos_uint16_t GLhalfARB; +typedef khronos_int32_t GLfixed; +typedef khronos_intptr_t GLintptr; +typedef khronos_intptr_t GLintptrARB; +typedef khronos_ssize_t GLsizeiptr; +typedef khronos_ssize_t GLsizeiptrARB; +typedef khronos_int64_t GLint64; +typedef khronos_int64_t GLint64EXT; +typedef khronos_uint64_t GLuint64; +typedef khronos_uint64_t GLuint64EXT; +typedef struct __GLsync *GLsync; +struct _cl_context; +struct _cl_event; +typedef void (APIENTRY *GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (APIENTRY *GLDEBUGPROCARB)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (APIENTRY *GLDEBUGPROCKHR)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,const void *userParam); +typedef void (APIENTRY *GLDEBUGPROCAMD)(GLuint id,GLenum category,GLenum severity,GLsizei length,const GLchar *message,void *userParam); +typedef unsigned short GLhalfNV; +typedef GLintptr GLvdpauSurfaceNV; +typedef void (APIENTRY *GLVULKANPROCNV)(void); +#define GL_DEPTH_BUFFER_BIT 0x00000100 +#define GL_STENCIL_BUFFER_BIT 0x00000400 +#define GL_COLOR_BUFFER_BIT 0x00004000 +#define GL_FALSE 0 +#define GL_TRUE 1 +#define GL_POINTS 0x0000 +#define GL_LINES 0x0001 +#define GL_LINE_LOOP 0x0002 +#define GL_LINE_STRIP 0x0003 +#define GL_TRIANGLES 0x0004 +#define GL_TRIANGLE_STRIP 0x0005 +#define GL_TRIANGLE_FAN 0x0006 +#define GL_NEVER 0x0200 +#define GL_LESS 0x0201 +#define GL_EQUAL 0x0202 +#define GL_LEQUAL 0x0203 +#define GL_GREATER 0x0204 +#define GL_NOTEQUAL 0x0205 +#define GL_GEQUAL 0x0206 +#define GL_ALWAYS 0x0207 +#define GL_ZERO 0 +#define GL_ONE 1 +#define GL_SRC_COLOR 0x0300 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_SRC_ALPHA 0x0302 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_DST_ALPHA 0x0304 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 +#define GL_DST_COLOR 0x0306 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_SRC_ALPHA_SATURATE 0x0308 +#define GL_NONE 0 +#define GL_FRONT_LEFT 0x0400 +#define GL_FRONT_RIGHT 0x0401 +#define GL_BACK_LEFT 0x0402 +#define GL_BACK_RIGHT 0x0403 +#define GL_FRONT 0x0404 +#define GL_BACK 0x0405 +#define GL_LEFT 0x0406 +#define GL_RIGHT 0x0407 +#define GL_FRONT_AND_BACK 0x0408 +#define GL_NO_ERROR 0 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVALID_OPERATION 0x0502 +#define GL_OUT_OF_MEMORY 0x0505 +#define GL_CW 0x0900 +#define GL_CCW 0x0901 +#define GL_POINT_SIZE 0x0B11 +#define GL_POINT_SIZE_RANGE 0x0B12 +#define GL_POINT_SIZE_GRANULARITY 0x0B13 +#define GL_LINE_SMOOTH 0x0B20 +#define GL_LINE_WIDTH 0x0B21 +#define GL_LINE_WIDTH_RANGE 0x0B22 +#define GL_LINE_WIDTH_GRANULARITY 0x0B23 +#define GL_POLYGON_MODE 0x0B40 +#define GL_POLYGON_SMOOTH 0x0B41 +#define GL_CULL_FACE 0x0B44 +#define GL_CULL_FACE_MODE 0x0B45 +#define GL_FRONT_FACE 0x0B46 +#define GL_DEPTH_RANGE 0x0B70 +#define GL_DEPTH_TEST 0x0B71 +#define GL_DEPTH_WRITEMASK 0x0B72 +#define GL_DEPTH_CLEAR_VALUE 0x0B73 +#define GL_DEPTH_FUNC 0x0B74 +#define GL_STENCIL_TEST 0x0B90 +#define GL_STENCIL_CLEAR_VALUE 0x0B91 +#define GL_STENCIL_FUNC 0x0B92 +#define GL_STENCIL_VALUE_MASK 0x0B93 +#define GL_STENCIL_FAIL 0x0B94 +#define GL_STENCIL_PASS_DEPTH_FAIL 0x0B95 +#define GL_STENCIL_PASS_DEPTH_PASS 0x0B96 +#define GL_STENCIL_REF 0x0B97 +#define GL_STENCIL_WRITEMASK 0x0B98 +#define GL_VIEWPORT 0x0BA2 +#define GL_DITHER 0x0BD0 +#define GL_BLEND_DST 0x0BE0 +#define GL_BLEND_SRC 0x0BE1 +#define GL_BLEND 0x0BE2 +#define GL_LOGIC_OP_MODE 0x0BF0 +#define GL_DRAW_BUFFER 0x0C01 +#define GL_READ_BUFFER 0x0C02 +#define GL_SCISSOR_BOX 0x0C10 +#define GL_SCISSOR_TEST 0x0C11 +#define GL_COLOR_CLEAR_VALUE 0x0C22 +#define GL_COLOR_WRITEMASK 0x0C23 +#define GL_DOUBLEBUFFER 0x0C32 +#define GL_STEREO 0x0C33 +#define GL_LINE_SMOOTH_HINT 0x0C52 +#define GL_POLYGON_SMOOTH_HINT 0x0C53 +#define GL_UNPACK_SWAP_BYTES 0x0CF0 +#define GL_UNPACK_LSB_FIRST 0x0CF1 +#define GL_UNPACK_ROW_LENGTH 0x0CF2 +#define GL_UNPACK_SKIP_ROWS 0x0CF3 +#define GL_UNPACK_SKIP_PIXELS 0x0CF4 +#define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_PACK_SWAP_BYTES 0x0D00 +#define GL_PACK_LSB_FIRST 0x0D01 +#define GL_PACK_ROW_LENGTH 0x0D02 +#define GL_PACK_SKIP_ROWS 0x0D03 +#define GL_PACK_SKIP_PIXELS 0x0D04 +#define GL_PACK_ALIGNMENT 0x0D05 +#define GL_MAX_TEXTURE_SIZE 0x0D33 +#define GL_MAX_VIEWPORT_DIMS 0x0D3A +#define GL_SUBPIXEL_BITS 0x0D50 +#define GL_TEXTURE_1D 0x0DE0 +#define GL_TEXTURE_2D 0x0DE1 +#define GL_TEXTURE_WIDTH 0x1000 +#define GL_TEXTURE_HEIGHT 0x1001 +#define GL_TEXTURE_BORDER_COLOR 0x1004 +#define GL_DONT_CARE 0x1100 +#define GL_FASTEST 0x1101 +#define GL_NICEST 0x1102 +#define GL_BYTE 0x1400 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_SHORT 0x1402 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_INT 0x1404 +#define GL_UNSIGNED_INT 0x1405 +#define GL_FLOAT 0x1406 +#define GL_CLEAR 0x1500 +#define GL_AND 0x1501 +#define GL_AND_REVERSE 0x1502 +#define GL_COPY 0x1503 +#define GL_AND_INVERTED 0x1504 +#define GL_NOOP 0x1505 +#define GL_XOR 0x1506 +#define GL_OR 0x1507 +#define GL_NOR 0x1508 +#define GL_EQUIV 0x1509 +#define GL_INVERT 0x150A +#define GL_OR_REVERSE 0x150B +#define GL_COPY_INVERTED 0x150C +#define GL_OR_INVERTED 0x150D +#define GL_NAND 0x150E +#define GL_SET 0x150F +#define GL_TEXTURE 0x1702 +#define GL_COLOR 0x1800 +#define GL_DEPTH 0x1801 +#define GL_STENCIL 0x1802 +#define GL_STENCIL_INDEX 0x1901 +#define GL_DEPTH_COMPONENT 0x1902 +#define GL_RED 0x1903 +#define GL_GREEN 0x1904 +#define GL_BLUE 0x1905 +#define GL_ALPHA 0x1906 +#define GL_RGB 0x1907 +#define GL_RGBA 0x1908 +#define GL_POINT 0x1B00 +#define GL_LINE 0x1B01 +#define GL_FILL 0x1B02 +#define GL_KEEP 0x1E00 +#define GL_REPLACE 0x1E01 +#define GL_INCR 0x1E02 +#define GL_DECR 0x1E03 +#define GL_VENDOR 0x1F00 +#define GL_RENDERER 0x1F01 +#define GL_VERSION 0x1F02 +#define GL_EXTENSIONS 0x1F03 +#define GL_NEAREST 0x2600 +#define GL_LINEAR 0x2601 +#define GL_NEAREST_MIPMAP_NEAREST 0x2700 +#define GL_LINEAR_MIPMAP_NEAREST 0x2701 +#define GL_NEAREST_MIPMAP_LINEAR 0x2702 +#define GL_LINEAR_MIPMAP_LINEAR 0x2703 +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 +#define GL_REPEAT 0x2901 +#define GL_COLOR_LOGIC_OP 0x0BF2 +#define GL_POLYGON_OFFSET_UNITS 0x2A00 +#define GL_POLYGON_OFFSET_POINT 0x2A01 +#define GL_POLYGON_OFFSET_LINE 0x2A02 +#define GL_POLYGON_OFFSET_FILL 0x8037 +#define GL_POLYGON_OFFSET_FACTOR 0x8038 +#define GL_TEXTURE_BINDING_1D 0x8068 +#define GL_TEXTURE_BINDING_2D 0x8069 +#define GL_TEXTURE_INTERNAL_FORMAT 0x1003 +#define GL_TEXTURE_RED_SIZE 0x805C +#define GL_TEXTURE_GREEN_SIZE 0x805D +#define GL_TEXTURE_BLUE_SIZE 0x805E +#define GL_TEXTURE_ALPHA_SIZE 0x805F +#define GL_DOUBLE 0x140A +#define GL_PROXY_TEXTURE_1D 0x8063 +#define GL_PROXY_TEXTURE_2D 0x8064 +#define GL_R3_G3_B2 0x2A10 +#define GL_RGB4 0x804F +#define GL_RGB5 0x8050 +#define GL_RGB8 0x8051 +#define GL_RGB10 0x8052 +#define GL_RGB12 0x8053 +#define GL_RGB16 0x8054 +#define GL_RGBA2 0x8055 +#define GL_RGBA4 0x8056 +#define GL_RGB5_A1 0x8057 +#define GL_RGBA8 0x8058 +#define GL_RGB10_A2 0x8059 +#define GL_RGBA12 0x805A +#define GL_RGBA16 0x805B +#define GL_UNSIGNED_BYTE_3_3_2 0x8032 +#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033 +#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034 +#define GL_UNSIGNED_INT_8_8_8_8 0x8035 +#define GL_UNSIGNED_INT_10_10_10_2 0x8036 +#define GL_TEXTURE_BINDING_3D 0x806A +#define GL_PACK_SKIP_IMAGES 0x806B +#define GL_PACK_IMAGE_HEIGHT 0x806C +#define GL_UNPACK_SKIP_IMAGES 0x806D +#define GL_UNPACK_IMAGE_HEIGHT 0x806E +#define GL_TEXTURE_3D 0x806F +#define GL_PROXY_TEXTURE_3D 0x8070 +#define GL_TEXTURE_DEPTH 0x8071 +#define GL_TEXTURE_WRAP_R 0x8072 +#define GL_MAX_3D_TEXTURE_SIZE 0x8073 +#define GL_UNSIGNED_BYTE_2_3_3_REV 0x8362 +#define GL_UNSIGNED_SHORT_5_6_5 0x8363 +#define GL_UNSIGNED_SHORT_5_6_5_REV 0x8364 +#define GL_UNSIGNED_SHORT_4_4_4_4_REV 0x8365 +#define GL_UNSIGNED_SHORT_1_5_5_5_REV 0x8366 +#define GL_UNSIGNED_INT_8_8_8_8_REV 0x8367 +#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368 +#define GL_BGR 0x80E0 +#define GL_BGRA 0x80E1 +#define GL_MAX_ELEMENTS_VERTICES 0x80E8 +#define GL_MAX_ELEMENTS_INDICES 0x80E9 +#define GL_CLAMP_TO_EDGE 0x812F +#define GL_TEXTURE_MIN_LOD 0x813A +#define GL_TEXTURE_MAX_LOD 0x813B +#define GL_TEXTURE_BASE_LEVEL 0x813C +#define GL_TEXTURE_MAX_LEVEL 0x813D +#define GL_SMOOTH_POINT_SIZE_RANGE 0x0B12 +#define GL_SMOOTH_POINT_SIZE_GRANULARITY 0x0B13 +#define GL_SMOOTH_LINE_WIDTH_RANGE 0x0B22 +#define GL_SMOOTH_LINE_WIDTH_GRANULARITY 0x0B23 +#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E +#define GL_TEXTURE0 0x84C0 +#define GL_TEXTURE1 0x84C1 +#define GL_TEXTURE2 0x84C2 +#define GL_TEXTURE3 0x84C3 +#define GL_TEXTURE4 0x84C4 +#define GL_TEXTURE5 0x84C5 +#define GL_TEXTURE6 0x84C6 +#define GL_TEXTURE7 0x84C7 +#define GL_TEXTURE8 0x84C8 +#define GL_TEXTURE9 0x84C9 +#define GL_TEXTURE10 0x84CA +#define GL_TEXTURE11 0x84CB +#define GL_TEXTURE12 0x84CC +#define GL_TEXTURE13 0x84CD +#define GL_TEXTURE14 0x84CE +#define GL_TEXTURE15 0x84CF +#define GL_TEXTURE16 0x84D0 +#define GL_TEXTURE17 0x84D1 +#define GL_TEXTURE18 0x84D2 +#define GL_TEXTURE19 0x84D3 +#define GL_TEXTURE20 0x84D4 +#define GL_TEXTURE21 0x84D5 +#define GL_TEXTURE22 0x84D6 +#define GL_TEXTURE23 0x84D7 +#define GL_TEXTURE24 0x84D8 +#define GL_TEXTURE25 0x84D9 +#define GL_TEXTURE26 0x84DA +#define GL_TEXTURE27 0x84DB +#define GL_TEXTURE28 0x84DC +#define GL_TEXTURE29 0x84DD +#define GL_TEXTURE30 0x84DE +#define GL_TEXTURE31 0x84DF +#define GL_ACTIVE_TEXTURE 0x84E0 +#define GL_MULTISAMPLE 0x809D +#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E +#define GL_SAMPLE_ALPHA_TO_ONE 0x809F +#define GL_SAMPLE_COVERAGE 0x80A0 +#define GL_SAMPLE_BUFFERS 0x80A8 +#define GL_SAMPLES 0x80A9 +#define GL_SAMPLE_COVERAGE_VALUE 0x80AA +#define GL_SAMPLE_COVERAGE_INVERT 0x80AB +#define GL_TEXTURE_CUBE_MAP 0x8513 +#define GL_TEXTURE_BINDING_CUBE_MAP 0x8514 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A +#define GL_PROXY_TEXTURE_CUBE_MAP 0x851B +#define GL_MAX_CUBE_MAP_TEXTURE_SIZE 0x851C +#define GL_COMPRESSED_RGB 0x84ED +#define GL_COMPRESSED_RGBA 0x84EE +#define GL_TEXTURE_COMPRESSION_HINT 0x84EF +#define GL_TEXTURE_COMPRESSED_IMAGE_SIZE 0x86A0 +#define GL_TEXTURE_COMPRESSED 0x86A1 +#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2 +#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3 +#define GL_CLAMP_TO_BORDER 0x812D +#define GL_BLEND_DST_RGB 0x80C8 +#define GL_BLEND_SRC_RGB 0x80C9 +#define GL_BLEND_DST_ALPHA 0x80CA +#define GL_BLEND_SRC_ALPHA 0x80CB +#define GL_POINT_FADE_THRESHOLD_SIZE 0x8128 +#define GL_DEPTH_COMPONENT16 0x81A5 +#define GL_DEPTH_COMPONENT24 0x81A6 +#define GL_DEPTH_COMPONENT32 0x81A7 +#define GL_MIRRORED_REPEAT 0x8370 +#define GL_MAX_TEXTURE_LOD_BIAS 0x84FD +#define GL_TEXTURE_LOD_BIAS 0x8501 +#define GL_INCR_WRAP 0x8507 +#define GL_DECR_WRAP 0x8508 +#define GL_TEXTURE_DEPTH_SIZE 0x884A +#define GL_TEXTURE_COMPARE_MODE 0x884C +#define GL_TEXTURE_COMPARE_FUNC 0x884D +#define GL_BLEND_COLOR 0x8005 +#define GL_BLEND_EQUATION 0x8009 +#define GL_CONSTANT_COLOR 0x8001 +#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002 +#define GL_CONSTANT_ALPHA 0x8003 +#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004 +#define GL_FUNC_ADD 0x8006 +#define GL_FUNC_REVERSE_SUBTRACT 0x800B +#define GL_FUNC_SUBTRACT 0x800A +#define GL_MIN 0x8007 +#define GL_MAX 0x8008 +#define GL_BUFFER_SIZE 0x8764 +#define GL_BUFFER_USAGE 0x8765 +#define GL_QUERY_COUNTER_BITS 0x8864 +#define GL_CURRENT_QUERY 0x8865 +#define GL_QUERY_RESULT 0x8866 +#define GL_QUERY_RESULT_AVAILABLE 0x8867 +#define GL_ARRAY_BUFFER 0x8892 +#define GL_ELEMENT_ARRAY_BUFFER 0x8893 +#define GL_ARRAY_BUFFER_BINDING 0x8894 +#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895 +#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F +#define GL_READ_ONLY 0x88B8 +#define GL_WRITE_ONLY 0x88B9 +#define GL_READ_WRITE 0x88BA +#define GL_BUFFER_ACCESS 0x88BB +#define GL_BUFFER_MAPPED 0x88BC +#define GL_BUFFER_MAP_POINTER 0x88BD +#define GL_STREAM_DRAW 0x88E0 +#define GL_STREAM_READ 0x88E1 +#define GL_STREAM_COPY 0x88E2 +#define GL_STATIC_DRAW 0x88E4 +#define GL_STATIC_READ 0x88E5 +#define GL_STATIC_COPY 0x88E6 +#define GL_DYNAMIC_DRAW 0x88E8 +#define GL_DYNAMIC_READ 0x88E9 +#define GL_DYNAMIC_COPY 0x88EA +#define GL_SAMPLES_PASSED 0x8914 +#define GL_SRC1_ALPHA 0x8589 +#define GL_BLEND_EQUATION_RGB 0x8009 +#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8622 +#define GL_VERTEX_ATTRIB_ARRAY_SIZE 0x8623 +#define GL_VERTEX_ATTRIB_ARRAY_STRIDE 0x8624 +#define GL_VERTEX_ATTRIB_ARRAY_TYPE 0x8625 +#define GL_CURRENT_VERTEX_ATTRIB 0x8626 +#define GL_VERTEX_PROGRAM_POINT_SIZE 0x8642 +#define GL_VERTEX_ATTRIB_ARRAY_POINTER 0x8645 +#define GL_STENCIL_BACK_FUNC 0x8800 +#define GL_STENCIL_BACK_FAIL 0x8801 +#define GL_STENCIL_BACK_PASS_DEPTH_FAIL 0x8802 +#define GL_STENCIL_BACK_PASS_DEPTH_PASS 0x8803 +#define GL_MAX_DRAW_BUFFERS 0x8824 +#define GL_DRAW_BUFFER0 0x8825 +#define GL_DRAW_BUFFER1 0x8826 +#define GL_DRAW_BUFFER2 0x8827 +#define GL_DRAW_BUFFER3 0x8828 +#define GL_DRAW_BUFFER4 0x8829 +#define GL_DRAW_BUFFER5 0x882A +#define GL_DRAW_BUFFER6 0x882B +#define GL_DRAW_BUFFER7 0x882C +#define GL_DRAW_BUFFER8 0x882D +#define GL_DRAW_BUFFER9 0x882E +#define GL_DRAW_BUFFER10 0x882F +#define GL_DRAW_BUFFER11 0x8830 +#define GL_DRAW_BUFFER12 0x8831 +#define GL_DRAW_BUFFER13 0x8832 +#define GL_DRAW_BUFFER14 0x8833 +#define GL_DRAW_BUFFER15 0x8834 +#define GL_BLEND_EQUATION_ALPHA 0x883D +#define GL_MAX_VERTEX_ATTRIBS 0x8869 +#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A +#define GL_MAX_TEXTURE_IMAGE_UNITS 0x8872 +#define GL_FRAGMENT_SHADER 0x8B30 +#define GL_VERTEX_SHADER 0x8B31 +#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49 +#define GL_MAX_VERTEX_UNIFORM_COMPONENTS 0x8B4A +#define GL_MAX_VARYING_FLOATS 0x8B4B +#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C +#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D +#define GL_SHADER_TYPE 0x8B4F +#define GL_FLOAT_VEC2 0x8B50 +#define GL_FLOAT_VEC3 0x8B51 +#define GL_FLOAT_VEC4 0x8B52 +#define GL_INT_VEC2 0x8B53 +#define GL_INT_VEC3 0x8B54 +#define GL_INT_VEC4 0x8B55 +#define GL_BOOL 0x8B56 +#define GL_BOOL_VEC2 0x8B57 +#define GL_BOOL_VEC3 0x8B58 +#define GL_BOOL_VEC4 0x8B59 +#define GL_FLOAT_MAT2 0x8B5A +#define GL_FLOAT_MAT3 0x8B5B +#define GL_FLOAT_MAT4 0x8B5C +#define GL_SAMPLER_1D 0x8B5D +#define GL_SAMPLER_2D 0x8B5E +#define GL_SAMPLER_3D 0x8B5F +#define GL_SAMPLER_CUBE 0x8B60 +#define GL_SAMPLER_1D_SHADOW 0x8B61 +#define GL_SAMPLER_2D_SHADOW 0x8B62 +#define GL_DELETE_STATUS 0x8B80 +#define GL_COMPILE_STATUS 0x8B81 +#define GL_LINK_STATUS 0x8B82 +#define GL_VALIDATE_STATUS 0x8B83 +#define GL_INFO_LOG_LENGTH 0x8B84 +#define GL_ATTACHED_SHADERS 0x8B85 +#define GL_ACTIVE_UNIFORMS 0x8B86 +#define GL_ACTIVE_UNIFORM_MAX_LENGTH 0x8B87 +#define GL_SHADER_SOURCE_LENGTH 0x8B88 +#define GL_ACTIVE_ATTRIBUTES 0x8B89 +#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A +#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT 0x8B8B +#define GL_SHADING_LANGUAGE_VERSION 0x8B8C +#define GL_CURRENT_PROGRAM 0x8B8D +#define GL_POINT_SPRITE_COORD_ORIGIN 0x8CA0 +#define GL_LOWER_LEFT 0x8CA1 +#define GL_UPPER_LEFT 0x8CA2 +#define GL_STENCIL_BACK_REF 0x8CA3 +#define GL_STENCIL_BACK_VALUE_MASK 0x8CA4 +#define GL_STENCIL_BACK_WRITEMASK 0x8CA5 +#define GL_PIXEL_PACK_BUFFER 0x88EB +#define GL_PIXEL_UNPACK_BUFFER 0x88EC +#define GL_PIXEL_PACK_BUFFER_BINDING 0x88ED +#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88EF +#define GL_FLOAT_MAT2x3 0x8B65 +#define GL_FLOAT_MAT2x4 0x8B66 +#define GL_FLOAT_MAT3x2 0x8B67 +#define GL_FLOAT_MAT3x4 0x8B68 +#define GL_FLOAT_MAT4x2 0x8B69 +#define GL_FLOAT_MAT4x3 0x8B6A +#define GL_SRGB 0x8C40 +#define GL_SRGB8 0x8C41 +#define GL_SRGB_ALPHA 0x8C42 +#define GL_SRGB8_ALPHA8 0x8C43 +#define GL_COMPRESSED_SRGB 0x8C48 +#define GL_COMPRESSED_SRGB_ALPHA 0x8C49 +#define GL_COMPARE_REF_TO_TEXTURE 0x884E +#define GL_CLIP_DISTANCE0 0x3000 +#define GL_CLIP_DISTANCE1 0x3001 +#define GL_CLIP_DISTANCE2 0x3002 +#define GL_CLIP_DISTANCE3 0x3003 +#define GL_CLIP_DISTANCE4 0x3004 +#define GL_CLIP_DISTANCE5 0x3005 +#define GL_CLIP_DISTANCE6 0x3006 +#define GL_CLIP_DISTANCE7 0x3007 +#define GL_MAX_CLIP_DISTANCES 0x0D32 +#define GL_MAJOR_VERSION 0x821B +#define GL_MINOR_VERSION 0x821C +#define GL_NUM_EXTENSIONS 0x821D +#define GL_CONTEXT_FLAGS 0x821E +#define GL_COMPRESSED_RED 0x8225 +#define GL_COMPRESSED_RG 0x8226 +#define GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT 0x00000001 +#define GL_RGBA32F 0x8814 +#define GL_RGB32F 0x8815 +#define GL_RGBA16F 0x881A +#define GL_RGB16F 0x881B +#define GL_VERTEX_ATTRIB_ARRAY_INTEGER 0x88FD +#define GL_MAX_ARRAY_TEXTURE_LAYERS 0x88FF +#define GL_MIN_PROGRAM_TEXEL_OFFSET 0x8904 +#define GL_MAX_PROGRAM_TEXEL_OFFSET 0x8905 +#define GL_CLAMP_READ_COLOR 0x891C +#define GL_FIXED_ONLY 0x891D +#define GL_MAX_VARYING_COMPONENTS 0x8B4B +#define GL_TEXTURE_1D_ARRAY 0x8C18 +#define GL_PROXY_TEXTURE_1D_ARRAY 0x8C19 +#define GL_TEXTURE_2D_ARRAY 0x8C1A +#define GL_PROXY_TEXTURE_2D_ARRAY 0x8C1B +#define GL_TEXTURE_BINDING_1D_ARRAY 0x8C1C +#define GL_TEXTURE_BINDING_2D_ARRAY 0x8C1D +#define GL_R11F_G11F_B10F 0x8C3A +#define GL_UNSIGNED_INT_10F_11F_11F_REV 0x8C3B +#define GL_RGB9_E5 0x8C3D +#define GL_UNSIGNED_INT_5_9_9_9_REV 0x8C3E +#define GL_TEXTURE_SHARED_SIZE 0x8C3F +#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH 0x8C76 +#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE 0x8C7F +#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 0x8C80 +#define GL_TRANSFORM_FEEDBACK_VARYINGS 0x8C83 +#define GL_TRANSFORM_FEEDBACK_BUFFER_START 0x8C84 +#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE 0x8C85 +#define GL_PRIMITIVES_GENERATED 0x8C87 +#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN 0x8C88 +#define GL_RASTERIZER_DISCARD 0x8C89 +#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 0x8C8A +#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 0x8C8B +#define GL_INTERLEAVED_ATTRIBS 0x8C8C +#define GL_SEPARATE_ATTRIBS 0x8C8D +#define GL_TRANSFORM_FEEDBACK_BUFFER 0x8C8E +#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING 0x8C8F +#define GL_RGBA32UI 0x8D70 +#define GL_RGB32UI 0x8D71 +#define GL_RGBA16UI 0x8D76 +#define GL_RGB16UI 0x8D77 +#define GL_RGBA8UI 0x8D7C +#define GL_RGB8UI 0x8D7D +#define GL_RGBA32I 0x8D82 +#define GL_RGB32I 0x8D83 +#define GL_RGBA16I 0x8D88 +#define GL_RGB16I 0x8D89 +#define GL_RGBA8I 0x8D8E +#define GL_RGB8I 0x8D8F +#define GL_RED_INTEGER 0x8D94 +#define GL_GREEN_INTEGER 0x8D95 +#define GL_BLUE_INTEGER 0x8D96 +#define GL_RGB_INTEGER 0x8D98 +#define GL_RGBA_INTEGER 0x8D99 +#define GL_BGR_INTEGER 0x8D9A +#define GL_BGRA_INTEGER 0x8D9B +#define GL_SAMPLER_1D_ARRAY 0x8DC0 +#define GL_SAMPLER_2D_ARRAY 0x8DC1 +#define GL_SAMPLER_1D_ARRAY_SHADOW 0x8DC3 +#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4 +#define GL_SAMPLER_CUBE_SHADOW 0x8DC5 +#define GL_UNSIGNED_INT_VEC2 0x8DC6 +#define GL_UNSIGNED_INT_VEC3 0x8DC7 +#define GL_UNSIGNED_INT_VEC4 0x8DC8 +#define GL_INT_SAMPLER_1D 0x8DC9 +#define GL_INT_SAMPLER_2D 0x8DCA +#define GL_INT_SAMPLER_3D 0x8DCB +#define GL_INT_SAMPLER_CUBE 0x8DCC +#define GL_INT_SAMPLER_1D_ARRAY 0x8DCE +#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF +#define GL_UNSIGNED_INT_SAMPLER_1D 0x8DD1 +#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2 +#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3 +#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4 +#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY 0x8DD6 +#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7 +#define GL_QUERY_WAIT 0x8E13 +#define GL_QUERY_NO_WAIT 0x8E14 +#define GL_QUERY_BY_REGION_WAIT 0x8E15 +#define GL_QUERY_BY_REGION_NO_WAIT 0x8E16 +#define GL_BUFFER_ACCESS_FLAGS 0x911F +#define GL_BUFFER_MAP_LENGTH 0x9120 +#define GL_BUFFER_MAP_OFFSET 0x9121 +#define GL_DEPTH_COMPONENT32F 0x8CAC +#define GL_DEPTH32F_STENCIL8 0x8CAD +#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV 0x8DAD +#define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506 +#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING 0x8210 +#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE 0x8211 +#define GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE 0x8212 +#define GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE 0x8213 +#define GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE 0x8214 +#define GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE 0x8215 +#define GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE 0x8216 +#define GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE 0x8217 +#define GL_FRAMEBUFFER_DEFAULT 0x8218 +#define GL_FRAMEBUFFER_UNDEFINED 0x8219 +#define GL_DEPTH_STENCIL_ATTACHMENT 0x821A +#define GL_MAX_RENDERBUFFER_SIZE 0x84E8 +#define GL_DEPTH_STENCIL 0x84F9 +#define GL_UNSIGNED_INT_24_8 0x84FA +#define GL_DEPTH24_STENCIL8 0x88F0 +#define GL_TEXTURE_STENCIL_SIZE 0x88F1 +#define GL_TEXTURE_RED_TYPE 0x8C10 +#define GL_TEXTURE_GREEN_TYPE 0x8C11 +#define GL_TEXTURE_BLUE_TYPE 0x8C12 +#define GL_TEXTURE_ALPHA_TYPE 0x8C13 +#define GL_TEXTURE_DEPTH_TYPE 0x8C16 +#define GL_UNSIGNED_NORMALIZED 0x8C17 +#define GL_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_DRAW_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_RENDERBUFFER_BINDING 0x8CA7 +#define GL_READ_FRAMEBUFFER 0x8CA8 +#define GL_DRAW_FRAMEBUFFER 0x8CA9 +#define GL_READ_FRAMEBUFFER_BINDING 0x8CAA +#define GL_RENDERBUFFER_SAMPLES 0x8CAB +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER 0x8CD4 +#define GL_FRAMEBUFFER_COMPLETE 0x8CD5 +#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6 +#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7 +#define GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER 0x8CDB +#define GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER 0x8CDC +#define GL_FRAMEBUFFER_UNSUPPORTED 0x8CDD +#define GL_MAX_COLOR_ATTACHMENTS 0x8CDF +#define GL_COLOR_ATTACHMENT0 0x8CE0 +#define GL_COLOR_ATTACHMENT1 0x8CE1 +#define GL_COLOR_ATTACHMENT2 0x8CE2 +#define GL_COLOR_ATTACHMENT3 0x8CE3 +#define GL_COLOR_ATTACHMENT4 0x8CE4 +#define GL_COLOR_ATTACHMENT5 0x8CE5 +#define GL_COLOR_ATTACHMENT6 0x8CE6 +#define GL_COLOR_ATTACHMENT7 0x8CE7 +#define GL_COLOR_ATTACHMENT8 0x8CE8 +#define GL_COLOR_ATTACHMENT9 0x8CE9 +#define GL_COLOR_ATTACHMENT10 0x8CEA +#define GL_COLOR_ATTACHMENT11 0x8CEB +#define GL_COLOR_ATTACHMENT12 0x8CEC +#define GL_COLOR_ATTACHMENT13 0x8CED +#define GL_COLOR_ATTACHMENT14 0x8CEE +#define GL_COLOR_ATTACHMENT15 0x8CEF +#define GL_COLOR_ATTACHMENT16 0x8CF0 +#define GL_COLOR_ATTACHMENT17 0x8CF1 +#define GL_COLOR_ATTACHMENT18 0x8CF2 +#define GL_COLOR_ATTACHMENT19 0x8CF3 +#define GL_COLOR_ATTACHMENT20 0x8CF4 +#define GL_COLOR_ATTACHMENT21 0x8CF5 +#define GL_COLOR_ATTACHMENT22 0x8CF6 +#define GL_COLOR_ATTACHMENT23 0x8CF7 +#define GL_COLOR_ATTACHMENT24 0x8CF8 +#define GL_COLOR_ATTACHMENT25 0x8CF9 +#define GL_COLOR_ATTACHMENT26 0x8CFA +#define GL_COLOR_ATTACHMENT27 0x8CFB +#define GL_COLOR_ATTACHMENT28 0x8CFC +#define GL_COLOR_ATTACHMENT29 0x8CFD +#define GL_COLOR_ATTACHMENT30 0x8CFE +#define GL_COLOR_ATTACHMENT31 0x8CFF +#define GL_DEPTH_ATTACHMENT 0x8D00 +#define GL_STENCIL_ATTACHMENT 0x8D20 +#define GL_FRAMEBUFFER 0x8D40 +#define GL_RENDERBUFFER 0x8D41 +#define GL_RENDERBUFFER_WIDTH 0x8D42 +#define GL_RENDERBUFFER_HEIGHT 0x8D43 +#define GL_RENDERBUFFER_INTERNAL_FORMAT 0x8D44 +#define GL_STENCIL_INDEX1 0x8D46 +#define GL_STENCIL_INDEX4 0x8D47 +#define GL_STENCIL_INDEX8 0x8D48 +#define GL_STENCIL_INDEX16 0x8D49 +#define GL_RENDERBUFFER_RED_SIZE 0x8D50 +#define GL_RENDERBUFFER_GREEN_SIZE 0x8D51 +#define GL_RENDERBUFFER_BLUE_SIZE 0x8D52 +#define GL_RENDERBUFFER_ALPHA_SIZE 0x8D53 +#define GL_RENDERBUFFER_DEPTH_SIZE 0x8D54 +#define GL_RENDERBUFFER_STENCIL_SIZE 0x8D55 +#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE 0x8D56 +#define GL_MAX_SAMPLES 0x8D57 +#define GL_FRAMEBUFFER_SRGB 0x8DB9 +#define GL_HALF_FLOAT 0x140B +#define GL_MAP_READ_BIT 0x0001 +#define GL_MAP_WRITE_BIT 0x0002 +#define GL_MAP_INVALIDATE_RANGE_BIT 0x0004 +#define GL_MAP_INVALIDATE_BUFFER_BIT 0x0008 +#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010 +#define GL_MAP_UNSYNCHRONIZED_BIT 0x0020 +#define GL_COMPRESSED_RED_RGTC1 0x8DBB +#define GL_COMPRESSED_SIGNED_RED_RGTC1 0x8DBC +#define GL_COMPRESSED_RG_RGTC2 0x8DBD +#define GL_COMPRESSED_SIGNED_RG_RGTC2 0x8DBE +#define GL_RG 0x8227 +#define GL_RG_INTEGER 0x8228 +#define GL_R8 0x8229 +#define GL_R16 0x822A +#define GL_RG8 0x822B +#define GL_RG16 0x822C +#define GL_R16F 0x822D +#define GL_R32F 0x822E +#define GL_RG16F 0x822F +#define GL_RG32F 0x8230 +#define GL_R8I 0x8231 +#define GL_R8UI 0x8232 +#define GL_R16I 0x8233 +#define GL_R16UI 0x8234 +#define GL_R32I 0x8235 +#define GL_R32UI 0x8236 +#define GL_RG8I 0x8237 +#define GL_RG8UI 0x8238 +#define GL_RG16I 0x8239 +#define GL_RG16UI 0x823A +#define GL_RG32I 0x823B +#define GL_RG32UI 0x823C +#define GL_VERTEX_ARRAY_BINDING 0x85B5 +#define GL_SAMPLER_2D_RECT 0x8B63 +#define GL_SAMPLER_2D_RECT_SHADOW 0x8B64 +#define GL_SAMPLER_BUFFER 0x8DC2 +#define GL_INT_SAMPLER_2D_RECT 0x8DCD +#define GL_INT_SAMPLER_BUFFER 0x8DD0 +#define GL_UNSIGNED_INT_SAMPLER_2D_RECT 0x8DD5 +#define GL_UNSIGNED_INT_SAMPLER_BUFFER 0x8DD8 +#define GL_TEXTURE_BUFFER 0x8C2A +#define GL_MAX_TEXTURE_BUFFER_SIZE 0x8C2B +#define GL_TEXTURE_BINDING_BUFFER 0x8C2C +#define GL_TEXTURE_BUFFER_DATA_STORE_BINDING 0x8C2D +#define GL_TEXTURE_RECTANGLE 0x84F5 +#define GL_TEXTURE_BINDING_RECTANGLE 0x84F6 +#define GL_PROXY_TEXTURE_RECTANGLE 0x84F7 +#define GL_MAX_RECTANGLE_TEXTURE_SIZE 0x84F8 +#define GL_R8_SNORM 0x8F94 +#define GL_RG8_SNORM 0x8F95 +#define GL_RGB8_SNORM 0x8F96 +#define GL_RGBA8_SNORM 0x8F97 +#define GL_R16_SNORM 0x8F98 +#define GL_RG16_SNORM 0x8F99 +#define GL_RGB16_SNORM 0x8F9A +#define GL_RGBA16_SNORM 0x8F9B +#define GL_SIGNED_NORMALIZED 0x8F9C +#define GL_PRIMITIVE_RESTART 0x8F9D +#define GL_PRIMITIVE_RESTART_INDEX 0x8F9E +#define GL_COPY_READ_BUFFER 0x8F36 +#define GL_COPY_WRITE_BUFFER 0x8F37 +#define GL_UNIFORM_BUFFER 0x8A11 +#define GL_UNIFORM_BUFFER_BINDING 0x8A28 +#define GL_UNIFORM_BUFFER_START 0x8A29 +#define GL_UNIFORM_BUFFER_SIZE 0x8A2A +#define GL_MAX_VERTEX_UNIFORM_BLOCKS 0x8A2B +#define GL_MAX_GEOMETRY_UNIFORM_BLOCKS 0x8A2C +#define GL_MAX_FRAGMENT_UNIFORM_BLOCKS 0x8A2D +#define GL_MAX_COMBINED_UNIFORM_BLOCKS 0x8A2E +#define GL_MAX_UNIFORM_BUFFER_BINDINGS 0x8A2F +#define GL_MAX_UNIFORM_BLOCK_SIZE 0x8A30 +#define GL_MAX_COMBINED_VERTEX_UNIFORM_COMPONENTS 0x8A31 +#define GL_MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS 0x8A32 +#define GL_MAX_COMBINED_FRAGMENT_UNIFORM_COMPONENTS 0x8A33 +#define GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT 0x8A34 +#define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH 0x8A35 +#define GL_ACTIVE_UNIFORM_BLOCKS 0x8A36 +#define GL_UNIFORM_TYPE 0x8A37 +#define GL_UNIFORM_SIZE 0x8A38 +#define GL_UNIFORM_NAME_LENGTH 0x8A39 +#define GL_UNIFORM_BLOCK_INDEX 0x8A3A +#define GL_UNIFORM_OFFSET 0x8A3B +#define GL_UNIFORM_ARRAY_STRIDE 0x8A3C +#define GL_UNIFORM_MATRIX_STRIDE 0x8A3D +#define GL_UNIFORM_IS_ROW_MAJOR 0x8A3E +#define GL_UNIFORM_BLOCK_BINDING 0x8A3F +#define GL_UNIFORM_BLOCK_DATA_SIZE 0x8A40 +#define GL_UNIFORM_BLOCK_NAME_LENGTH 0x8A41 +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS 0x8A42 +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES 0x8A43 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER 0x8A44 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER 0x8A45 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER 0x8A46 +#define GL_INVALID_INDEX 0xFFFFFFFF +#define GL_CONTEXT_CORE_PROFILE_BIT 0x00000001 +#define GL_CONTEXT_COMPATIBILITY_PROFILE_BIT 0x00000002 +#define GL_LINES_ADJACENCY 0x000A +#define GL_LINE_STRIP_ADJACENCY 0x000B +#define GL_TRIANGLES_ADJACENCY 0x000C +#define GL_TRIANGLE_STRIP_ADJACENCY 0x000D +#define GL_PROGRAM_POINT_SIZE 0x8642 +#define GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS 0x8C29 +#define GL_FRAMEBUFFER_ATTACHMENT_LAYERED 0x8DA7 +#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS 0x8DA8 +#define GL_GEOMETRY_SHADER 0x8DD9 +#define GL_GEOMETRY_VERTICES_OUT 0x8916 +#define GL_GEOMETRY_INPUT_TYPE 0x8917 +#define GL_GEOMETRY_OUTPUT_TYPE 0x8918 +#define GL_MAX_GEOMETRY_UNIFORM_COMPONENTS 0x8DDF +#define GL_MAX_GEOMETRY_OUTPUT_VERTICES 0x8DE0 +#define GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS 0x8DE1 +#define GL_MAX_VERTEX_OUTPUT_COMPONENTS 0x9122 +#define GL_MAX_GEOMETRY_INPUT_COMPONENTS 0x9123 +#define GL_MAX_GEOMETRY_OUTPUT_COMPONENTS 0x9124 +#define GL_MAX_FRAGMENT_INPUT_COMPONENTS 0x9125 +#define GL_CONTEXT_PROFILE_MASK 0x9126 +#define GL_DEPTH_CLAMP 0x864F +#define GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION 0x8E4C +#define GL_FIRST_VERTEX_CONVENTION 0x8E4D +#define GL_LAST_VERTEX_CONVENTION 0x8E4E +#define GL_PROVOKING_VERTEX 0x8E4F +#define GL_TEXTURE_CUBE_MAP_SEAMLESS 0x884F +#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111 +#define GL_OBJECT_TYPE 0x9112 +#define GL_SYNC_CONDITION 0x9113 +#define GL_SYNC_STATUS 0x9114 +#define GL_SYNC_FLAGS 0x9115 +#define GL_SYNC_FENCE 0x9116 +#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117 +#define GL_UNSIGNALED 0x9118 +#define GL_SIGNALED 0x9119 +#define GL_ALREADY_SIGNALED 0x911A +#define GL_TIMEOUT_EXPIRED 0x911B +#define GL_CONDITION_SATISFIED 0x911C +#define GL_WAIT_FAILED 0x911D +#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFF +#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001 +#define GL_SAMPLE_POSITION 0x8E50 +#define GL_SAMPLE_MASK 0x8E51 +#define GL_SAMPLE_MASK_VALUE 0x8E52 +#define GL_MAX_SAMPLE_MASK_WORDS 0x8E59 +#define GL_TEXTURE_2D_MULTISAMPLE 0x9100 +#define GL_PROXY_TEXTURE_2D_MULTISAMPLE 0x9101 +#define GL_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9102 +#define GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9103 +#define GL_TEXTURE_BINDING_2D_MULTISAMPLE 0x9104 +#define GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY 0x9105 +#define GL_TEXTURE_SAMPLES 0x9106 +#define GL_TEXTURE_FIXED_SAMPLE_LOCATIONS 0x9107 +#define GL_SAMPLER_2D_MULTISAMPLE 0x9108 +#define GL_INT_SAMPLER_2D_MULTISAMPLE 0x9109 +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE 0x910A +#define GL_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910B +#define GL_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910C +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910D +#define GL_MAX_COLOR_TEXTURE_SAMPLES 0x910E +#define GL_MAX_DEPTH_TEXTURE_SAMPLES 0x910F +#define GL_MAX_INTEGER_SAMPLES 0x9110 +#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR 0x88FE +#define GL_SRC1_COLOR 0x88F9 +#define GL_ONE_MINUS_SRC1_COLOR 0x88FA +#define GL_ONE_MINUS_SRC1_ALPHA 0x88FB +#define GL_MAX_DUAL_SOURCE_DRAW_BUFFERS 0x88FC +#define GL_ANY_SAMPLES_PASSED 0x8C2F +#define GL_SAMPLER_BINDING 0x8919 +#define GL_RGB10_A2UI 0x906F +#define GL_TEXTURE_SWIZZLE_R 0x8E42 +#define GL_TEXTURE_SWIZZLE_G 0x8E43 +#define GL_TEXTURE_SWIZZLE_B 0x8E44 +#define GL_TEXTURE_SWIZZLE_A 0x8E45 +#define GL_TEXTURE_SWIZZLE_RGBA 0x8E46 +#define GL_TIME_ELAPSED 0x88BF +#define GL_TIMESTAMP 0x8E28 +#define GL_INT_2_10_10_10_REV 0x8D9F +#ifndef GL_VERSION_1_0 +#define GL_VERSION_1_0 1 +GLAPI int GLAD_GL_VERSION_1_0; +typedef void (APIENTRYP PFNGLCULLFACEPROC)(GLenum mode); +GLAPI PFNGLCULLFACEPROC glad_glCullFace; +#define glCullFace glad_glCullFace +typedef void (APIENTRYP PFNGLFRONTFACEPROC)(GLenum mode); +GLAPI PFNGLFRONTFACEPROC glad_glFrontFace; +#define glFrontFace glad_glFrontFace +typedef void (APIENTRYP PFNGLHINTPROC)(GLenum target, GLenum mode); +GLAPI PFNGLHINTPROC glad_glHint; +#define glHint glad_glHint +typedef void (APIENTRYP PFNGLLINEWIDTHPROC)(GLfloat width); +GLAPI PFNGLLINEWIDTHPROC glad_glLineWidth; +#define glLineWidth glad_glLineWidth +typedef void (APIENTRYP PFNGLPOINTSIZEPROC)(GLfloat size); +GLAPI PFNGLPOINTSIZEPROC glad_glPointSize; +#define glPointSize glad_glPointSize +typedef void (APIENTRYP PFNGLPOLYGONMODEPROC)(GLenum face, GLenum mode); +GLAPI PFNGLPOLYGONMODEPROC glad_glPolygonMode; +#define glPolygonMode glad_glPolygonMode +typedef void (APIENTRYP PFNGLSCISSORPROC)(GLint x, GLint y, GLsizei width, GLsizei height); +GLAPI PFNGLSCISSORPROC glad_glScissor; +#define glScissor glad_glScissor +typedef void (APIENTRYP PFNGLTEXPARAMETERFPROC)(GLenum target, GLenum pname, GLfloat param); +GLAPI PFNGLTEXPARAMETERFPROC glad_glTexParameterf; +#define glTexParameterf glad_glTexParameterf +typedef void (APIENTRYP PFNGLTEXPARAMETERFVPROC)(GLenum target, GLenum pname, const GLfloat *params); +GLAPI PFNGLTEXPARAMETERFVPROC glad_glTexParameterfv; +#define glTexParameterfv glad_glTexParameterfv +typedef void (APIENTRYP PFNGLTEXPARAMETERIPROC)(GLenum target, GLenum pname, GLint param); +GLAPI PFNGLTEXPARAMETERIPROC glad_glTexParameteri; +#define glTexParameteri glad_glTexParameteri +typedef void (APIENTRYP PFNGLTEXPARAMETERIVPROC)(GLenum target, GLenum pname, const GLint *params); +GLAPI PFNGLTEXPARAMETERIVPROC glad_glTexParameteriv; +#define glTexParameteriv glad_glTexParameteriv +typedef void (APIENTRYP PFNGLTEXIMAGE1DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLint border, GLenum format, GLenum type, const void *pixels); +GLAPI PFNGLTEXIMAGE1DPROC glad_glTexImage1D; +#define glTexImage1D glad_glTexImage1D +typedef void (APIENTRYP PFNGLTEXIMAGE2DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const void *pixels); +GLAPI PFNGLTEXIMAGE2DPROC glad_glTexImage2D; +#define glTexImage2D glad_glTexImage2D +typedef void (APIENTRYP PFNGLDRAWBUFFERPROC)(GLenum buf); +GLAPI PFNGLDRAWBUFFERPROC glad_glDrawBuffer; +#define glDrawBuffer glad_glDrawBuffer +typedef void (APIENTRYP PFNGLCLEARPROC)(GLbitfield mask); +GLAPI PFNGLCLEARPROC glad_glClear; +#define glClear glad_glClear +typedef void (APIENTRYP PFNGLCLEARCOLORPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +GLAPI PFNGLCLEARCOLORPROC glad_glClearColor; +#define glClearColor glad_glClearColor +typedef void (APIENTRYP PFNGLCLEARSTENCILPROC)(GLint s); +GLAPI PFNGLCLEARSTENCILPROC glad_glClearStencil; +#define glClearStencil glad_glClearStencil +typedef void (APIENTRYP PFNGLCLEARDEPTHPROC)(GLdouble depth); +GLAPI PFNGLCLEARDEPTHPROC glad_glClearDepth; +#define glClearDepth glad_glClearDepth +typedef void (APIENTRYP PFNGLSTENCILMASKPROC)(GLuint mask); +GLAPI PFNGLSTENCILMASKPROC glad_glStencilMask; +#define glStencilMask glad_glStencilMask +typedef void (APIENTRYP PFNGLCOLORMASKPROC)(GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha); +GLAPI PFNGLCOLORMASKPROC glad_glColorMask; +#define glColorMask glad_glColorMask +typedef void (APIENTRYP PFNGLDEPTHMASKPROC)(GLboolean flag); +GLAPI PFNGLDEPTHMASKPROC glad_glDepthMask; +#define glDepthMask glad_glDepthMask +typedef void (APIENTRYP PFNGLDISABLEPROC)(GLenum cap); +GLAPI PFNGLDISABLEPROC glad_glDisable; +#define glDisable glad_glDisable +typedef void (APIENTRYP PFNGLENABLEPROC)(GLenum cap); +GLAPI PFNGLENABLEPROC glad_glEnable; +#define glEnable glad_glEnable +typedef void (APIENTRYP PFNGLFINISHPROC)(void); +GLAPI PFNGLFINISHPROC glad_glFinish; +#define glFinish glad_glFinish +typedef void (APIENTRYP PFNGLFLUSHPROC)(void); +GLAPI PFNGLFLUSHPROC glad_glFlush; +#define glFlush glad_glFlush +typedef void (APIENTRYP PFNGLBLENDFUNCPROC)(GLenum sfactor, GLenum dfactor); +GLAPI PFNGLBLENDFUNCPROC glad_glBlendFunc; +#define glBlendFunc glad_glBlendFunc +typedef void (APIENTRYP PFNGLLOGICOPPROC)(GLenum opcode); +GLAPI PFNGLLOGICOPPROC glad_glLogicOp; +#define glLogicOp glad_glLogicOp +typedef void (APIENTRYP PFNGLSTENCILFUNCPROC)(GLenum func, GLint ref, GLuint mask); +GLAPI PFNGLSTENCILFUNCPROC glad_glStencilFunc; +#define glStencilFunc glad_glStencilFunc +typedef void (APIENTRYP PFNGLSTENCILOPPROC)(GLenum fail, GLenum zfail, GLenum zpass); +GLAPI PFNGLSTENCILOPPROC glad_glStencilOp; +#define glStencilOp glad_glStencilOp +typedef void (APIENTRYP PFNGLDEPTHFUNCPROC)(GLenum func); +GLAPI PFNGLDEPTHFUNCPROC glad_glDepthFunc; +#define glDepthFunc glad_glDepthFunc +typedef void (APIENTRYP PFNGLPIXELSTOREFPROC)(GLenum pname, GLfloat param); +GLAPI PFNGLPIXELSTOREFPROC glad_glPixelStoref; +#define glPixelStoref glad_glPixelStoref +typedef void (APIENTRYP PFNGLPIXELSTOREIPROC)(GLenum pname, GLint param); +GLAPI PFNGLPIXELSTOREIPROC glad_glPixelStorei; +#define glPixelStorei glad_glPixelStorei +typedef void (APIENTRYP PFNGLREADBUFFERPROC)(GLenum src); +GLAPI PFNGLREADBUFFERPROC glad_glReadBuffer; +#define glReadBuffer glad_glReadBuffer +typedef void (APIENTRYP PFNGLREADPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, void *pixels); +GLAPI PFNGLREADPIXELSPROC glad_glReadPixels; +#define glReadPixels glad_glReadPixels +typedef void (APIENTRYP PFNGLGETBOOLEANVPROC)(GLenum pname, GLboolean *data); +GLAPI PFNGLGETBOOLEANVPROC glad_glGetBooleanv; +#define glGetBooleanv glad_glGetBooleanv +typedef void (APIENTRYP PFNGLGETDOUBLEVPROC)(GLenum pname, GLdouble *data); +GLAPI PFNGLGETDOUBLEVPROC glad_glGetDoublev; +#define glGetDoublev glad_glGetDoublev +typedef GLenum (APIENTRYP PFNGLGETERRORPROC)(void); +GLAPI PFNGLGETERRORPROC glad_glGetError; +#define glGetError glad_glGetError +typedef void (APIENTRYP PFNGLGETFLOATVPROC)(GLenum pname, GLfloat *data); +GLAPI PFNGLGETFLOATVPROC glad_glGetFloatv; +#define glGetFloatv glad_glGetFloatv +typedef void (APIENTRYP PFNGLGETINTEGERVPROC)(GLenum pname, GLint *data); +GLAPI PFNGLGETINTEGERVPROC glad_glGetIntegerv; +#define glGetIntegerv glad_glGetIntegerv +typedef const GLubyte * (APIENTRYP PFNGLGETSTRINGPROC)(GLenum name); +GLAPI PFNGLGETSTRINGPROC glad_glGetString; +#define glGetString glad_glGetString +typedef void (APIENTRYP PFNGLGETTEXIMAGEPROC)(GLenum target, GLint level, GLenum format, GLenum type, void *pixels); +GLAPI PFNGLGETTEXIMAGEPROC glad_glGetTexImage; +#define glGetTexImage glad_glGetTexImage +typedef void (APIENTRYP PFNGLGETTEXPARAMETERFVPROC)(GLenum target, GLenum pname, GLfloat *params); +GLAPI PFNGLGETTEXPARAMETERFVPROC glad_glGetTexParameterfv; +#define glGetTexParameterfv glad_glGetTexParameterfv +typedef void (APIENTRYP PFNGLGETTEXPARAMETERIVPROC)(GLenum target, GLenum pname, GLint *params); +GLAPI PFNGLGETTEXPARAMETERIVPROC glad_glGetTexParameteriv; +#define glGetTexParameteriv glad_glGetTexParameteriv +typedef void (APIENTRYP PFNGLGETTEXLEVELPARAMETERFVPROC)(GLenum target, GLint level, GLenum pname, GLfloat *params); +GLAPI PFNGLGETTEXLEVELPARAMETERFVPROC glad_glGetTexLevelParameterfv; +#define glGetTexLevelParameterfv glad_glGetTexLevelParameterfv +typedef void (APIENTRYP PFNGLGETTEXLEVELPARAMETERIVPROC)(GLenum target, GLint level, GLenum pname, GLint *params); +GLAPI PFNGLGETTEXLEVELPARAMETERIVPROC glad_glGetTexLevelParameteriv; +#define glGetTexLevelParameteriv glad_glGetTexLevelParameteriv +typedef GLboolean (APIENTRYP PFNGLISENABLEDPROC)(GLenum cap); +GLAPI PFNGLISENABLEDPROC glad_glIsEnabled; +#define glIsEnabled glad_glIsEnabled +typedef void (APIENTRYP PFNGLDEPTHRANGEPROC)(GLdouble n, GLdouble f); +GLAPI PFNGLDEPTHRANGEPROC glad_glDepthRange; +#define glDepthRange glad_glDepthRange +typedef void (APIENTRYP PFNGLVIEWPORTPROC)(GLint x, GLint y, GLsizei width, GLsizei height); +GLAPI PFNGLVIEWPORTPROC glad_glViewport; +#define glViewport glad_glViewport +#endif +#ifndef GL_VERSION_1_1 +#define GL_VERSION_1_1 1 +GLAPI int GLAD_GL_VERSION_1_1; +typedef void (APIENTRYP PFNGLDRAWARRAYSPROC)(GLenum mode, GLint first, GLsizei count); +GLAPI PFNGLDRAWARRAYSPROC glad_glDrawArrays; +#define glDrawArrays glad_glDrawArrays +typedef void (APIENTRYP PFNGLDRAWELEMENTSPROC)(GLenum mode, GLsizei count, GLenum type, const void *indices); +GLAPI PFNGLDRAWELEMENTSPROC glad_glDrawElements; +#define glDrawElements glad_glDrawElements +typedef void (APIENTRYP PFNGLPOLYGONOFFSETPROC)(GLfloat factor, GLfloat units); +GLAPI PFNGLPOLYGONOFFSETPROC glad_glPolygonOffset; +#define glPolygonOffset glad_glPolygonOffset +typedef void (APIENTRYP PFNGLCOPYTEXIMAGE1DPROC)(GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border); +GLAPI PFNGLCOPYTEXIMAGE1DPROC glad_glCopyTexImage1D; +#define glCopyTexImage1D glad_glCopyTexImage1D +typedef void (APIENTRYP PFNGLCOPYTEXIMAGE2DPROC)(GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border); +GLAPI PFNGLCOPYTEXIMAGE2DPROC glad_glCopyTexImage2D; +#define glCopyTexImage2D glad_glCopyTexImage2D +typedef void (APIENTRYP PFNGLCOPYTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width); +GLAPI PFNGLCOPYTEXSUBIMAGE1DPROC glad_glCopyTexSubImage1D; +#define glCopyTexSubImage1D glad_glCopyTexSubImage1D +typedef void (APIENTRYP PFNGLCOPYTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); +GLAPI PFNGLCOPYTEXSUBIMAGE2DPROC glad_glCopyTexSubImage2D; +#define glCopyTexSubImage2D glad_glCopyTexSubImage2D +typedef void (APIENTRYP PFNGLTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void *pixels); +GLAPI PFNGLTEXSUBIMAGE1DPROC glad_glTexSubImage1D; +#define glTexSubImage1D glad_glTexSubImage1D +typedef void (APIENTRYP PFNGLTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *pixels); +GLAPI PFNGLTEXSUBIMAGE2DPROC glad_glTexSubImage2D; +#define glTexSubImage2D glad_glTexSubImage2D +typedef void (APIENTRYP PFNGLBINDTEXTUREPROC)(GLenum target, GLuint texture); +GLAPI PFNGLBINDTEXTUREPROC glad_glBindTexture; +#define glBindTexture glad_glBindTexture +typedef void (APIENTRYP PFNGLDELETETEXTURESPROC)(GLsizei n, const GLuint *textures); +GLAPI PFNGLDELETETEXTURESPROC glad_glDeleteTextures; +#define glDeleteTextures glad_glDeleteTextures +typedef void (APIENTRYP PFNGLGENTEXTURESPROC)(GLsizei n, GLuint *textures); +GLAPI PFNGLGENTEXTURESPROC glad_glGenTextures; +#define glGenTextures glad_glGenTextures +typedef GLboolean (APIENTRYP PFNGLISTEXTUREPROC)(GLuint texture); +GLAPI PFNGLISTEXTUREPROC glad_glIsTexture; +#define glIsTexture glad_glIsTexture +#endif +#ifndef GL_VERSION_1_2 +#define GL_VERSION_1_2 1 +GLAPI int GLAD_GL_VERSION_1_2; +typedef void (APIENTRYP PFNGLDRAWRANGEELEMENTSPROC)(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void *indices); +GLAPI PFNGLDRAWRANGEELEMENTSPROC glad_glDrawRangeElements; +#define glDrawRangeElements glad_glDrawRangeElements +typedef void (APIENTRYP PFNGLTEXIMAGE3DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void *pixels); +GLAPI PFNGLTEXIMAGE3DPROC glad_glTexImage3D; +#define glTexImage3D glad_glTexImage3D +typedef void (APIENTRYP PFNGLTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *pixels); +GLAPI PFNGLTEXSUBIMAGE3DPROC glad_glTexSubImage3D; +#define glTexSubImage3D glad_glTexSubImage3D +typedef void (APIENTRYP PFNGLCOPYTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); +GLAPI PFNGLCOPYTEXSUBIMAGE3DPROC glad_glCopyTexSubImage3D; +#define glCopyTexSubImage3D glad_glCopyTexSubImage3D +#endif +#ifndef GL_VERSION_1_3 +#define GL_VERSION_1_3 1 +GLAPI int GLAD_GL_VERSION_1_3; +typedef void (APIENTRYP PFNGLACTIVETEXTUREPROC)(GLenum texture); +GLAPI PFNGLACTIVETEXTUREPROC glad_glActiveTexture; +#define glActiveTexture glad_glActiveTexture +typedef void (APIENTRYP PFNGLSAMPLECOVERAGEPROC)(GLfloat value, GLboolean invert); +GLAPI PFNGLSAMPLECOVERAGEPROC glad_glSampleCoverage; +#define glSampleCoverage glad_glSampleCoverage +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE3DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void *data); +GLAPI PFNGLCOMPRESSEDTEXIMAGE3DPROC glad_glCompressedTexImage3D; +#define glCompressedTexImage3D glad_glCompressedTexImage3D +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE2DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void *data); +GLAPI PFNGLCOMPRESSEDTEXIMAGE2DPROC glad_glCompressedTexImage2D; +#define glCompressedTexImage2D glad_glCompressedTexImage2D +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE1DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const void *data); +GLAPI PFNGLCOMPRESSEDTEXIMAGE1DPROC glad_glCompressedTexImage1D; +#define glCompressedTexImage1D glad_glCompressedTexImage1D +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void *data); +GLAPI PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC glad_glCompressedTexSubImage3D; +#define glCompressedTexSubImage3D glad_glCompressedTexSubImage3D +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data); +GLAPI PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC glad_glCompressedTexSubImage2D; +#define glCompressedTexSubImage2D glad_glCompressedTexSubImage2D +typedef void (APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void *data); +GLAPI PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC glad_glCompressedTexSubImage1D; +#define glCompressedTexSubImage1D glad_glCompressedTexSubImage1D +typedef void (APIENTRYP PFNGLGETCOMPRESSEDTEXIMAGEPROC)(GLenum target, GLint level, void *img); +GLAPI PFNGLGETCOMPRESSEDTEXIMAGEPROC glad_glGetCompressedTexImage; +#define glGetCompressedTexImage glad_glGetCompressedTexImage +#endif +#ifndef GL_VERSION_1_4 +#define GL_VERSION_1_4 1 +GLAPI int GLAD_GL_VERSION_1_4; +typedef void (APIENTRYP PFNGLBLENDFUNCSEPARATEPROC)(GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha); +GLAPI PFNGLBLENDFUNCSEPARATEPROC glad_glBlendFuncSeparate; +#define glBlendFuncSeparate glad_glBlendFuncSeparate +typedef void (APIENTRYP PFNGLMULTIDRAWARRAYSPROC)(GLenum mode, const GLint *first, const GLsizei *count, GLsizei drawcount); +GLAPI PFNGLMULTIDRAWARRAYSPROC glad_glMultiDrawArrays; +#define glMultiDrawArrays glad_glMultiDrawArrays +typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSPROC)(GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei drawcount); +GLAPI PFNGLMULTIDRAWELEMENTSPROC glad_glMultiDrawElements; +#define glMultiDrawElements glad_glMultiDrawElements +typedef void (APIENTRYP PFNGLPOINTPARAMETERFPROC)(GLenum pname, GLfloat param); +GLAPI PFNGLPOINTPARAMETERFPROC glad_glPointParameterf; +#define glPointParameterf glad_glPointParameterf +typedef void (APIENTRYP PFNGLPOINTPARAMETERFVPROC)(GLenum pname, const GLfloat *params); +GLAPI PFNGLPOINTPARAMETERFVPROC glad_glPointParameterfv; +#define glPointParameterfv glad_glPointParameterfv +typedef void (APIENTRYP PFNGLPOINTPARAMETERIPROC)(GLenum pname, GLint param); +GLAPI PFNGLPOINTPARAMETERIPROC glad_glPointParameteri; +#define glPointParameteri glad_glPointParameteri +typedef void (APIENTRYP PFNGLPOINTPARAMETERIVPROC)(GLenum pname, const GLint *params); +GLAPI PFNGLPOINTPARAMETERIVPROC glad_glPointParameteriv; +#define glPointParameteriv glad_glPointParameteriv +typedef void (APIENTRYP PFNGLBLENDCOLORPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +GLAPI PFNGLBLENDCOLORPROC glad_glBlendColor; +#define glBlendColor glad_glBlendColor +typedef void (APIENTRYP PFNGLBLENDEQUATIONPROC)(GLenum mode); +GLAPI PFNGLBLENDEQUATIONPROC glad_glBlendEquation; +#define glBlendEquation glad_glBlendEquation +#endif +#ifndef GL_VERSION_1_5 +#define GL_VERSION_1_5 1 +GLAPI int GLAD_GL_VERSION_1_5; +typedef void (APIENTRYP PFNGLGENQUERIESPROC)(GLsizei n, GLuint *ids); +GLAPI PFNGLGENQUERIESPROC glad_glGenQueries; +#define glGenQueries glad_glGenQueries +typedef void (APIENTRYP PFNGLDELETEQUERIESPROC)(GLsizei n, const GLuint *ids); +GLAPI PFNGLDELETEQUERIESPROC glad_glDeleteQueries; +#define glDeleteQueries glad_glDeleteQueries +typedef GLboolean (APIENTRYP PFNGLISQUERYPROC)(GLuint id); +GLAPI PFNGLISQUERYPROC glad_glIsQuery; +#define glIsQuery glad_glIsQuery +typedef void (APIENTRYP PFNGLBEGINQUERYPROC)(GLenum target, GLuint id); +GLAPI PFNGLBEGINQUERYPROC glad_glBeginQuery; +#define glBeginQuery glad_glBeginQuery +typedef void (APIENTRYP PFNGLENDQUERYPROC)(GLenum target); +GLAPI PFNGLENDQUERYPROC glad_glEndQuery; +#define glEndQuery glad_glEndQuery +typedef void (APIENTRYP PFNGLGETQUERYIVPROC)(GLenum target, GLenum pname, GLint *params); +GLAPI PFNGLGETQUERYIVPROC glad_glGetQueryiv; +#define glGetQueryiv glad_glGetQueryiv +typedef void (APIENTRYP PFNGLGETQUERYOBJECTIVPROC)(GLuint id, GLenum pname, GLint *params); +GLAPI PFNGLGETQUERYOBJECTIVPROC glad_glGetQueryObjectiv; +#define glGetQueryObjectiv glad_glGetQueryObjectiv +typedef void (APIENTRYP PFNGLGETQUERYOBJECTUIVPROC)(GLuint id, GLenum pname, GLuint *params); +GLAPI PFNGLGETQUERYOBJECTUIVPROC glad_glGetQueryObjectuiv; +#define glGetQueryObjectuiv glad_glGetQueryObjectuiv +typedef void (APIENTRYP PFNGLBINDBUFFERPROC)(GLenum target, GLuint buffer); +GLAPI PFNGLBINDBUFFERPROC glad_glBindBuffer; +#define glBindBuffer glad_glBindBuffer +typedef void (APIENTRYP PFNGLDELETEBUFFERSPROC)(GLsizei n, const GLuint *buffers); +GLAPI PFNGLDELETEBUFFERSPROC glad_glDeleteBuffers; +#define glDeleteBuffers glad_glDeleteBuffers +typedef void (APIENTRYP PFNGLGENBUFFERSPROC)(GLsizei n, GLuint *buffers); +GLAPI PFNGLGENBUFFERSPROC glad_glGenBuffers; +#define glGenBuffers glad_glGenBuffers +typedef GLboolean (APIENTRYP PFNGLISBUFFERPROC)(GLuint buffer); +GLAPI PFNGLISBUFFERPROC glad_glIsBuffer; +#define glIsBuffer glad_glIsBuffer +typedef void (APIENTRYP PFNGLBUFFERDATAPROC)(GLenum target, GLsizeiptr size, const void *data, GLenum usage); +GLAPI PFNGLBUFFERDATAPROC glad_glBufferData; +#define glBufferData glad_glBufferData +typedef void (APIENTRYP PFNGLBUFFERSUBDATAPROC)(GLenum target, GLintptr offset, GLsizeiptr size, const void *data); +GLAPI PFNGLBUFFERSUBDATAPROC glad_glBufferSubData; +#define glBufferSubData glad_glBufferSubData +typedef void (APIENTRYP PFNGLGETBUFFERSUBDATAPROC)(GLenum target, GLintptr offset, GLsizeiptr size, void *data); +GLAPI PFNGLGETBUFFERSUBDATAPROC glad_glGetBufferSubData; +#define glGetBufferSubData glad_glGetBufferSubData +typedef void * (APIENTRYP PFNGLMAPBUFFERPROC)(GLenum target, GLenum access); +GLAPI PFNGLMAPBUFFERPROC glad_glMapBuffer; +#define glMapBuffer glad_glMapBuffer +typedef GLboolean (APIENTRYP PFNGLUNMAPBUFFERPROC)(GLenum target); +GLAPI PFNGLUNMAPBUFFERPROC glad_glUnmapBuffer; +#define glUnmapBuffer glad_glUnmapBuffer +typedef void (APIENTRYP PFNGLGETBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint *params); +GLAPI PFNGLGETBUFFERPARAMETERIVPROC glad_glGetBufferParameteriv; +#define glGetBufferParameteriv glad_glGetBufferParameteriv +typedef void (APIENTRYP PFNGLGETBUFFERPOINTERVPROC)(GLenum target, GLenum pname, void **params); +GLAPI PFNGLGETBUFFERPOINTERVPROC glad_glGetBufferPointerv; +#define glGetBufferPointerv glad_glGetBufferPointerv +#endif +#ifndef GL_VERSION_2_0 +#define GL_VERSION_2_0 1 +GLAPI int GLAD_GL_VERSION_2_0; +typedef void (APIENTRYP PFNGLBLENDEQUATIONSEPARATEPROC)(GLenum modeRGB, GLenum modeAlpha); +GLAPI PFNGLBLENDEQUATIONSEPARATEPROC glad_glBlendEquationSeparate; +#define glBlendEquationSeparate glad_glBlendEquationSeparate +typedef void (APIENTRYP PFNGLDRAWBUFFERSPROC)(GLsizei n, const GLenum *bufs); +GLAPI PFNGLDRAWBUFFERSPROC glad_glDrawBuffers; +#define glDrawBuffers glad_glDrawBuffers +typedef void (APIENTRYP PFNGLSTENCILOPSEPARATEPROC)(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass); +GLAPI PFNGLSTENCILOPSEPARATEPROC glad_glStencilOpSeparate; +#define glStencilOpSeparate glad_glStencilOpSeparate +typedef void (APIENTRYP PFNGLSTENCILFUNCSEPARATEPROC)(GLenum face, GLenum func, GLint ref, GLuint mask); +GLAPI PFNGLSTENCILFUNCSEPARATEPROC glad_glStencilFuncSeparate; +#define glStencilFuncSeparate glad_glStencilFuncSeparate +typedef void (APIENTRYP PFNGLSTENCILMASKSEPARATEPROC)(GLenum face, GLuint mask); +GLAPI PFNGLSTENCILMASKSEPARATEPROC glad_glStencilMaskSeparate; +#define glStencilMaskSeparate glad_glStencilMaskSeparate +typedef void (APIENTRYP PFNGLATTACHSHADERPROC)(GLuint program, GLuint shader); +GLAPI PFNGLATTACHSHADERPROC glad_glAttachShader; +#define glAttachShader glad_glAttachShader +typedef void (APIENTRYP PFNGLBINDATTRIBLOCATIONPROC)(GLuint program, GLuint index, const GLchar *name); +GLAPI PFNGLBINDATTRIBLOCATIONPROC glad_glBindAttribLocation; +#define glBindAttribLocation glad_glBindAttribLocation +typedef void (APIENTRYP PFNGLCOMPILESHADERPROC)(GLuint shader); +GLAPI PFNGLCOMPILESHADERPROC glad_glCompileShader; +#define glCompileShader glad_glCompileShader +typedef GLuint (APIENTRYP PFNGLCREATEPROGRAMPROC)(void); +GLAPI PFNGLCREATEPROGRAMPROC glad_glCreateProgram; +#define glCreateProgram glad_glCreateProgram +typedef GLuint (APIENTRYP PFNGLCREATESHADERPROC)(GLenum type); +GLAPI PFNGLCREATESHADERPROC glad_glCreateShader; +#define glCreateShader glad_glCreateShader +typedef void (APIENTRYP PFNGLDELETEPROGRAMPROC)(GLuint program); +GLAPI PFNGLDELETEPROGRAMPROC glad_glDeleteProgram; +#define glDeleteProgram glad_glDeleteProgram +typedef void (APIENTRYP PFNGLDELETESHADERPROC)(GLuint shader); +GLAPI PFNGLDELETESHADERPROC glad_glDeleteShader; +#define glDeleteShader glad_glDeleteShader +typedef void (APIENTRYP PFNGLDETACHSHADERPROC)(GLuint program, GLuint shader); +GLAPI PFNGLDETACHSHADERPROC glad_glDetachShader; +#define glDetachShader glad_glDetachShader +typedef void (APIENTRYP PFNGLDISABLEVERTEXATTRIBARRAYPROC)(GLuint index); +GLAPI PFNGLDISABLEVERTEXATTRIBARRAYPROC glad_glDisableVertexAttribArray; +#define glDisableVertexAttribArray glad_glDisableVertexAttribArray +typedef void (APIENTRYP PFNGLENABLEVERTEXATTRIBARRAYPROC)(GLuint index); +GLAPI PFNGLENABLEVERTEXATTRIBARRAYPROC glad_glEnableVertexAttribArray; +#define glEnableVertexAttribArray glad_glEnableVertexAttribArray +typedef void (APIENTRYP PFNGLGETACTIVEATTRIBPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLint *size, GLenum *type, GLchar *name); +GLAPI PFNGLGETACTIVEATTRIBPROC glad_glGetActiveAttrib; +#define glGetActiveAttrib glad_glGetActiveAttrib +typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLint *size, GLenum *type, GLchar *name); +GLAPI PFNGLGETACTIVEUNIFORMPROC glad_glGetActiveUniform; +#define glGetActiveUniform glad_glGetActiveUniform +typedef void (APIENTRYP PFNGLGETATTACHEDSHADERSPROC)(GLuint program, GLsizei maxCount, GLsizei *count, GLuint *shaders); +GLAPI PFNGLGETATTACHEDSHADERSPROC glad_glGetAttachedShaders; +#define glGetAttachedShaders glad_glGetAttachedShaders +typedef GLint (APIENTRYP PFNGLGETATTRIBLOCATIONPROC)(GLuint program, const GLchar *name); +GLAPI PFNGLGETATTRIBLOCATIONPROC glad_glGetAttribLocation; +#define glGetAttribLocation glad_glGetAttribLocation +typedef void (APIENTRYP PFNGLGETPROGRAMIVPROC)(GLuint program, GLenum pname, GLint *params); +GLAPI PFNGLGETPROGRAMIVPROC glad_glGetProgramiv; +#define glGetProgramiv glad_glGetProgramiv +typedef void (APIENTRYP PFNGLGETPROGRAMINFOLOGPROC)(GLuint program, GLsizei bufSize, GLsizei *length, GLchar *infoLog); +GLAPI PFNGLGETPROGRAMINFOLOGPROC glad_glGetProgramInfoLog; +#define glGetProgramInfoLog glad_glGetProgramInfoLog +typedef void (APIENTRYP PFNGLGETSHADERIVPROC)(GLuint shader, GLenum pname, GLint *params); +GLAPI PFNGLGETSHADERIVPROC glad_glGetShaderiv; +#define glGetShaderiv glad_glGetShaderiv +typedef void (APIENTRYP PFNGLGETSHADERINFOLOGPROC)(GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *infoLog); +GLAPI PFNGLGETSHADERINFOLOGPROC glad_glGetShaderInfoLog; +#define glGetShaderInfoLog glad_glGetShaderInfoLog +typedef void (APIENTRYP PFNGLGETSHADERSOURCEPROC)(GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *source); +GLAPI PFNGLGETSHADERSOURCEPROC glad_glGetShaderSource; +#define glGetShaderSource glad_glGetShaderSource +typedef GLint (APIENTRYP PFNGLGETUNIFORMLOCATIONPROC)(GLuint program, const GLchar *name); +GLAPI PFNGLGETUNIFORMLOCATIONPROC glad_glGetUniformLocation; +#define glGetUniformLocation glad_glGetUniformLocation +typedef void (APIENTRYP PFNGLGETUNIFORMFVPROC)(GLuint program, GLint location, GLfloat *params); +GLAPI PFNGLGETUNIFORMFVPROC glad_glGetUniformfv; +#define glGetUniformfv glad_glGetUniformfv +typedef void (APIENTRYP PFNGLGETUNIFORMIVPROC)(GLuint program, GLint location, GLint *params); +GLAPI PFNGLGETUNIFORMIVPROC glad_glGetUniformiv; +#define glGetUniformiv glad_glGetUniformiv +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBDVPROC)(GLuint index, GLenum pname, GLdouble *params); +GLAPI PFNGLGETVERTEXATTRIBDVPROC glad_glGetVertexAttribdv; +#define glGetVertexAttribdv glad_glGetVertexAttribdv +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBFVPROC)(GLuint index, GLenum pname, GLfloat *params); +GLAPI PFNGLGETVERTEXATTRIBFVPROC glad_glGetVertexAttribfv; +#define glGetVertexAttribfv glad_glGetVertexAttribfv +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIVPROC)(GLuint index, GLenum pname, GLint *params); +GLAPI PFNGLGETVERTEXATTRIBIVPROC glad_glGetVertexAttribiv; +#define glGetVertexAttribiv glad_glGetVertexAttribiv +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBPOINTERVPROC)(GLuint index, GLenum pname, void **pointer); +GLAPI PFNGLGETVERTEXATTRIBPOINTERVPROC glad_glGetVertexAttribPointerv; +#define glGetVertexAttribPointerv glad_glGetVertexAttribPointerv +typedef GLboolean (APIENTRYP PFNGLISPROGRAMPROC)(GLuint program); +GLAPI PFNGLISPROGRAMPROC glad_glIsProgram; +#define glIsProgram glad_glIsProgram +typedef GLboolean (APIENTRYP PFNGLISSHADERPROC)(GLuint shader); +GLAPI PFNGLISSHADERPROC glad_glIsShader; +#define glIsShader glad_glIsShader +typedef void (APIENTRYP PFNGLLINKPROGRAMPROC)(GLuint program); +GLAPI PFNGLLINKPROGRAMPROC glad_glLinkProgram; +#define glLinkProgram glad_glLinkProgram +typedef void (APIENTRYP PFNGLSHADERSOURCEPROC)(GLuint shader, GLsizei count, const GLchar *const*string, const GLint *length); +GLAPI PFNGLSHADERSOURCEPROC glad_glShaderSource; +#define glShaderSource glad_glShaderSource +typedef void (APIENTRYP PFNGLUSEPROGRAMPROC)(GLuint program); +GLAPI PFNGLUSEPROGRAMPROC glad_glUseProgram; +#define glUseProgram glad_glUseProgram +typedef void (APIENTRYP PFNGLUNIFORM1FPROC)(GLint location, GLfloat v0); +GLAPI PFNGLUNIFORM1FPROC glad_glUniform1f; +#define glUniform1f glad_glUniform1f +typedef void (APIENTRYP PFNGLUNIFORM2FPROC)(GLint location, GLfloat v0, GLfloat v1); +GLAPI PFNGLUNIFORM2FPROC glad_glUniform2f; +#define glUniform2f glad_glUniform2f +typedef void (APIENTRYP PFNGLUNIFORM3FPROC)(GLint location, GLfloat v0, GLfloat v1, GLfloat v2); +GLAPI PFNGLUNIFORM3FPROC glad_glUniform3f; +#define glUniform3f glad_glUniform3f +typedef void (APIENTRYP PFNGLUNIFORM4FPROC)(GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3); +GLAPI PFNGLUNIFORM4FPROC glad_glUniform4f; +#define glUniform4f glad_glUniform4f +typedef void (APIENTRYP PFNGLUNIFORM1IPROC)(GLint location, GLint v0); +GLAPI PFNGLUNIFORM1IPROC glad_glUniform1i; +#define glUniform1i glad_glUniform1i +typedef void (APIENTRYP PFNGLUNIFORM2IPROC)(GLint location, GLint v0, GLint v1); +GLAPI PFNGLUNIFORM2IPROC glad_glUniform2i; +#define glUniform2i glad_glUniform2i +typedef void (APIENTRYP PFNGLUNIFORM3IPROC)(GLint location, GLint v0, GLint v1, GLint v2); +GLAPI PFNGLUNIFORM3IPROC glad_glUniform3i; +#define glUniform3i glad_glUniform3i +typedef void (APIENTRYP PFNGLUNIFORM4IPROC)(GLint location, GLint v0, GLint v1, GLint v2, GLint v3); +GLAPI PFNGLUNIFORM4IPROC glad_glUniform4i; +#define glUniform4i glad_glUniform4i +typedef void (APIENTRYP PFNGLUNIFORM1FVPROC)(GLint location, GLsizei count, const GLfloat *value); +GLAPI PFNGLUNIFORM1FVPROC glad_glUniform1fv; +#define glUniform1fv glad_glUniform1fv +typedef void (APIENTRYP PFNGLUNIFORM2FVPROC)(GLint location, GLsizei count, const GLfloat *value); +GLAPI PFNGLUNIFORM2FVPROC glad_glUniform2fv; +#define glUniform2fv glad_glUniform2fv +typedef void (APIENTRYP PFNGLUNIFORM3FVPROC)(GLint location, GLsizei count, const GLfloat *value); +GLAPI PFNGLUNIFORM3FVPROC glad_glUniform3fv; +#define glUniform3fv glad_glUniform3fv +typedef void (APIENTRYP PFNGLUNIFORM4FVPROC)(GLint location, GLsizei count, const GLfloat *value); +GLAPI PFNGLUNIFORM4FVPROC glad_glUniform4fv; +#define glUniform4fv glad_glUniform4fv +typedef void (APIENTRYP PFNGLUNIFORM1IVPROC)(GLint location, GLsizei count, const GLint *value); +GLAPI PFNGLUNIFORM1IVPROC glad_glUniform1iv; +#define glUniform1iv glad_glUniform1iv +typedef void (APIENTRYP PFNGLUNIFORM2IVPROC)(GLint location, GLsizei count, const GLint *value); +GLAPI PFNGLUNIFORM2IVPROC glad_glUniform2iv; +#define glUniform2iv glad_glUniform2iv +typedef void (APIENTRYP PFNGLUNIFORM3IVPROC)(GLint location, GLsizei count, const GLint *value); +GLAPI PFNGLUNIFORM3IVPROC glad_glUniform3iv; +#define glUniform3iv glad_glUniform3iv +typedef void (APIENTRYP PFNGLUNIFORM4IVPROC)(GLint location, GLsizei count, const GLint *value); +GLAPI PFNGLUNIFORM4IVPROC glad_glUniform4iv; +#define glUniform4iv glad_glUniform4iv +typedef void (APIENTRYP PFNGLUNIFORMMATRIX2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI PFNGLUNIFORMMATRIX2FVPROC glad_glUniformMatrix2fv; +#define glUniformMatrix2fv glad_glUniformMatrix2fv +typedef void (APIENTRYP PFNGLUNIFORMMATRIX3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI PFNGLUNIFORMMATRIX3FVPROC glad_glUniformMatrix3fv; +#define glUniformMatrix3fv glad_glUniformMatrix3fv +typedef void (APIENTRYP PFNGLUNIFORMMATRIX4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI PFNGLUNIFORMMATRIX4FVPROC glad_glUniformMatrix4fv; +#define glUniformMatrix4fv glad_glUniformMatrix4fv +typedef void (APIENTRYP PFNGLVALIDATEPROGRAMPROC)(GLuint program); +GLAPI PFNGLVALIDATEPROGRAMPROC glad_glValidateProgram; +#define glValidateProgram glad_glValidateProgram +typedef void (APIENTRYP PFNGLVERTEXATTRIB1DPROC)(GLuint index, GLdouble x); +GLAPI PFNGLVERTEXATTRIB1DPROC glad_glVertexAttrib1d; +#define glVertexAttrib1d glad_glVertexAttrib1d +typedef void (APIENTRYP PFNGLVERTEXATTRIB1DVPROC)(GLuint index, const GLdouble *v); +GLAPI PFNGLVERTEXATTRIB1DVPROC glad_glVertexAttrib1dv; +#define glVertexAttrib1dv glad_glVertexAttrib1dv +typedef void (APIENTRYP PFNGLVERTEXATTRIB1FPROC)(GLuint index, GLfloat x); +GLAPI PFNGLVERTEXATTRIB1FPROC glad_glVertexAttrib1f; +#define glVertexAttrib1f glad_glVertexAttrib1f +typedef void (APIENTRYP PFNGLVERTEXATTRIB1FVPROC)(GLuint index, const GLfloat *v); +GLAPI PFNGLVERTEXATTRIB1FVPROC glad_glVertexAttrib1fv; +#define glVertexAttrib1fv glad_glVertexAttrib1fv +typedef void (APIENTRYP PFNGLVERTEXATTRIB1SPROC)(GLuint index, GLshort x); +GLAPI PFNGLVERTEXATTRIB1SPROC glad_glVertexAttrib1s; +#define glVertexAttrib1s glad_glVertexAttrib1s +typedef void (APIENTRYP PFNGLVERTEXATTRIB1SVPROC)(GLuint index, const GLshort *v); +GLAPI PFNGLVERTEXATTRIB1SVPROC glad_glVertexAttrib1sv; +#define glVertexAttrib1sv glad_glVertexAttrib1sv +typedef void (APIENTRYP PFNGLVERTEXATTRIB2DPROC)(GLuint index, GLdouble x, GLdouble y); +GLAPI PFNGLVERTEXATTRIB2DPROC glad_glVertexAttrib2d; +#define glVertexAttrib2d glad_glVertexAttrib2d +typedef void (APIENTRYP PFNGLVERTEXATTRIB2DVPROC)(GLuint index, const GLdouble *v); +GLAPI PFNGLVERTEXATTRIB2DVPROC glad_glVertexAttrib2dv; +#define glVertexAttrib2dv glad_glVertexAttrib2dv +typedef void (APIENTRYP PFNGLVERTEXATTRIB2FPROC)(GLuint index, GLfloat x, GLfloat y); +GLAPI PFNGLVERTEXATTRIB2FPROC glad_glVertexAttrib2f; +#define glVertexAttrib2f glad_glVertexAttrib2f +typedef void (APIENTRYP PFNGLVERTEXATTRIB2FVPROC)(GLuint index, const GLfloat *v); +GLAPI PFNGLVERTEXATTRIB2FVPROC glad_glVertexAttrib2fv; +#define glVertexAttrib2fv glad_glVertexAttrib2fv +typedef void (APIENTRYP PFNGLVERTEXATTRIB2SPROC)(GLuint index, GLshort x, GLshort y); +GLAPI PFNGLVERTEXATTRIB2SPROC glad_glVertexAttrib2s; +#define glVertexAttrib2s glad_glVertexAttrib2s +typedef void (APIENTRYP PFNGLVERTEXATTRIB2SVPROC)(GLuint index, const GLshort *v); +GLAPI PFNGLVERTEXATTRIB2SVPROC glad_glVertexAttrib2sv; +#define glVertexAttrib2sv glad_glVertexAttrib2sv +typedef void (APIENTRYP PFNGLVERTEXATTRIB3DPROC)(GLuint index, GLdouble x, GLdouble y, GLdouble z); +GLAPI PFNGLVERTEXATTRIB3DPROC glad_glVertexAttrib3d; +#define glVertexAttrib3d glad_glVertexAttrib3d +typedef void (APIENTRYP PFNGLVERTEXATTRIB3DVPROC)(GLuint index, const GLdouble *v); +GLAPI PFNGLVERTEXATTRIB3DVPROC glad_glVertexAttrib3dv; +#define glVertexAttrib3dv glad_glVertexAttrib3dv +typedef void (APIENTRYP PFNGLVERTEXATTRIB3FPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat z); +GLAPI PFNGLVERTEXATTRIB3FPROC glad_glVertexAttrib3f; +#define glVertexAttrib3f glad_glVertexAttrib3f +typedef void (APIENTRYP PFNGLVERTEXATTRIB3FVPROC)(GLuint index, const GLfloat *v); +GLAPI PFNGLVERTEXATTRIB3FVPROC glad_glVertexAttrib3fv; +#define glVertexAttrib3fv glad_glVertexAttrib3fv +typedef void (APIENTRYP PFNGLVERTEXATTRIB3SPROC)(GLuint index, GLshort x, GLshort y, GLshort z); +GLAPI PFNGLVERTEXATTRIB3SPROC glad_glVertexAttrib3s; +#define glVertexAttrib3s glad_glVertexAttrib3s +typedef void (APIENTRYP PFNGLVERTEXATTRIB3SVPROC)(GLuint index, const GLshort *v); +GLAPI PFNGLVERTEXATTRIB3SVPROC glad_glVertexAttrib3sv; +#define glVertexAttrib3sv glad_glVertexAttrib3sv +typedef void (APIENTRYP PFNGLVERTEXATTRIB4NBVPROC)(GLuint index, const GLbyte *v); +GLAPI PFNGLVERTEXATTRIB4NBVPROC glad_glVertexAttrib4Nbv; +#define glVertexAttrib4Nbv glad_glVertexAttrib4Nbv +typedef void (APIENTRYP PFNGLVERTEXATTRIB4NIVPROC)(GLuint index, const GLint *v); +GLAPI PFNGLVERTEXATTRIB4NIVPROC glad_glVertexAttrib4Niv; +#define glVertexAttrib4Niv glad_glVertexAttrib4Niv +typedef void (APIENTRYP PFNGLVERTEXATTRIB4NSVPROC)(GLuint index, const GLshort *v); +GLAPI PFNGLVERTEXATTRIB4NSVPROC glad_glVertexAttrib4Nsv; +#define glVertexAttrib4Nsv glad_glVertexAttrib4Nsv +typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUBPROC)(GLuint index, GLubyte x, GLubyte y, GLubyte z, GLubyte w); +GLAPI PFNGLVERTEXATTRIB4NUBPROC glad_glVertexAttrib4Nub; +#define glVertexAttrib4Nub glad_glVertexAttrib4Nub +typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUBVPROC)(GLuint index, const GLubyte *v); +GLAPI PFNGLVERTEXATTRIB4NUBVPROC glad_glVertexAttrib4Nubv; +#define glVertexAttrib4Nubv glad_glVertexAttrib4Nubv +typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUIVPROC)(GLuint index, const GLuint *v); +GLAPI PFNGLVERTEXATTRIB4NUIVPROC glad_glVertexAttrib4Nuiv; +#define glVertexAttrib4Nuiv glad_glVertexAttrib4Nuiv +typedef void (APIENTRYP PFNGLVERTEXATTRIB4NUSVPROC)(GLuint index, const GLushort *v); +GLAPI PFNGLVERTEXATTRIB4NUSVPROC glad_glVertexAttrib4Nusv; +#define glVertexAttrib4Nusv glad_glVertexAttrib4Nusv +typedef void (APIENTRYP PFNGLVERTEXATTRIB4BVPROC)(GLuint index, const GLbyte *v); +GLAPI PFNGLVERTEXATTRIB4BVPROC glad_glVertexAttrib4bv; +#define glVertexAttrib4bv glad_glVertexAttrib4bv +typedef void (APIENTRYP PFNGLVERTEXATTRIB4DPROC)(GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w); +GLAPI PFNGLVERTEXATTRIB4DPROC glad_glVertexAttrib4d; +#define glVertexAttrib4d glad_glVertexAttrib4d +typedef void (APIENTRYP PFNGLVERTEXATTRIB4DVPROC)(GLuint index, const GLdouble *v); +GLAPI PFNGLVERTEXATTRIB4DVPROC glad_glVertexAttrib4dv; +#define glVertexAttrib4dv glad_glVertexAttrib4dv +typedef void (APIENTRYP PFNGLVERTEXATTRIB4FPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +GLAPI PFNGLVERTEXATTRIB4FPROC glad_glVertexAttrib4f; +#define glVertexAttrib4f glad_glVertexAttrib4f +typedef void (APIENTRYP PFNGLVERTEXATTRIB4FVPROC)(GLuint index, const GLfloat *v); +GLAPI PFNGLVERTEXATTRIB4FVPROC glad_glVertexAttrib4fv; +#define glVertexAttrib4fv glad_glVertexAttrib4fv +typedef void (APIENTRYP PFNGLVERTEXATTRIB4IVPROC)(GLuint index, const GLint *v); +GLAPI PFNGLVERTEXATTRIB4IVPROC glad_glVertexAttrib4iv; +#define glVertexAttrib4iv glad_glVertexAttrib4iv +typedef void (APIENTRYP PFNGLVERTEXATTRIB4SPROC)(GLuint index, GLshort x, GLshort y, GLshort z, GLshort w); +GLAPI PFNGLVERTEXATTRIB4SPROC glad_glVertexAttrib4s; +#define glVertexAttrib4s glad_glVertexAttrib4s +typedef void (APIENTRYP PFNGLVERTEXATTRIB4SVPROC)(GLuint index, const GLshort *v); +GLAPI PFNGLVERTEXATTRIB4SVPROC glad_glVertexAttrib4sv; +#define glVertexAttrib4sv glad_glVertexAttrib4sv +typedef void (APIENTRYP PFNGLVERTEXATTRIB4UBVPROC)(GLuint index, const GLubyte *v); +GLAPI PFNGLVERTEXATTRIB4UBVPROC glad_glVertexAttrib4ubv; +#define glVertexAttrib4ubv glad_glVertexAttrib4ubv +typedef void (APIENTRYP PFNGLVERTEXATTRIB4UIVPROC)(GLuint index, const GLuint *v); +GLAPI PFNGLVERTEXATTRIB4UIVPROC glad_glVertexAttrib4uiv; +#define glVertexAttrib4uiv glad_glVertexAttrib4uiv +typedef void (APIENTRYP PFNGLVERTEXATTRIB4USVPROC)(GLuint index, const GLushort *v); +GLAPI PFNGLVERTEXATTRIB4USVPROC glad_glVertexAttrib4usv; +#define glVertexAttrib4usv glad_glVertexAttrib4usv +typedef void (APIENTRYP PFNGLVERTEXATTRIBPOINTERPROC)(GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void *pointer); +GLAPI PFNGLVERTEXATTRIBPOINTERPROC glad_glVertexAttribPointer; +#define glVertexAttribPointer glad_glVertexAttribPointer +#endif +#ifndef GL_VERSION_2_1 +#define GL_VERSION_2_1 1 +GLAPI int GLAD_GL_VERSION_2_1; +typedef void (APIENTRYP PFNGLUNIFORMMATRIX2X3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI PFNGLUNIFORMMATRIX2X3FVPROC glad_glUniformMatrix2x3fv; +#define glUniformMatrix2x3fv glad_glUniformMatrix2x3fv +typedef void (APIENTRYP PFNGLUNIFORMMATRIX3X2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI PFNGLUNIFORMMATRIX3X2FVPROC glad_glUniformMatrix3x2fv; +#define glUniformMatrix3x2fv glad_glUniformMatrix3x2fv +typedef void (APIENTRYP PFNGLUNIFORMMATRIX2X4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI PFNGLUNIFORMMATRIX2X4FVPROC glad_glUniformMatrix2x4fv; +#define glUniformMatrix2x4fv glad_glUniformMatrix2x4fv +typedef void (APIENTRYP PFNGLUNIFORMMATRIX4X2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI PFNGLUNIFORMMATRIX4X2FVPROC glad_glUniformMatrix4x2fv; +#define glUniformMatrix4x2fv glad_glUniformMatrix4x2fv +typedef void (APIENTRYP PFNGLUNIFORMMATRIX3X4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI PFNGLUNIFORMMATRIX3X4FVPROC glad_glUniformMatrix3x4fv; +#define glUniformMatrix3x4fv glad_glUniformMatrix3x4fv +typedef void (APIENTRYP PFNGLUNIFORMMATRIX4X3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GLAPI PFNGLUNIFORMMATRIX4X3FVPROC glad_glUniformMatrix4x3fv; +#define glUniformMatrix4x3fv glad_glUniformMatrix4x3fv +#endif +#ifndef GL_VERSION_3_0 +#define GL_VERSION_3_0 1 +GLAPI int GLAD_GL_VERSION_3_0; +typedef void (APIENTRYP PFNGLCOLORMASKIPROC)(GLuint index, GLboolean r, GLboolean g, GLboolean b, GLboolean a); +GLAPI PFNGLCOLORMASKIPROC glad_glColorMaski; +#define glColorMaski glad_glColorMaski +typedef void (APIENTRYP PFNGLGETBOOLEANI_VPROC)(GLenum target, GLuint index, GLboolean *data); +GLAPI PFNGLGETBOOLEANI_VPROC glad_glGetBooleani_v; +#define glGetBooleani_v glad_glGetBooleani_v +typedef void (APIENTRYP PFNGLGETINTEGERI_VPROC)(GLenum target, GLuint index, GLint *data); +GLAPI PFNGLGETINTEGERI_VPROC glad_glGetIntegeri_v; +#define glGetIntegeri_v glad_glGetIntegeri_v +typedef void (APIENTRYP PFNGLENABLEIPROC)(GLenum target, GLuint index); +GLAPI PFNGLENABLEIPROC glad_glEnablei; +#define glEnablei glad_glEnablei +typedef void (APIENTRYP PFNGLDISABLEIPROC)(GLenum target, GLuint index); +GLAPI PFNGLDISABLEIPROC glad_glDisablei; +#define glDisablei glad_glDisablei +typedef GLboolean (APIENTRYP PFNGLISENABLEDIPROC)(GLenum target, GLuint index); +GLAPI PFNGLISENABLEDIPROC glad_glIsEnabledi; +#define glIsEnabledi glad_glIsEnabledi +typedef void (APIENTRYP PFNGLBEGINTRANSFORMFEEDBACKPROC)(GLenum primitiveMode); +GLAPI PFNGLBEGINTRANSFORMFEEDBACKPROC glad_glBeginTransformFeedback; +#define glBeginTransformFeedback glad_glBeginTransformFeedback +typedef void (APIENTRYP PFNGLENDTRANSFORMFEEDBACKPROC)(void); +GLAPI PFNGLENDTRANSFORMFEEDBACKPROC glad_glEndTransformFeedback; +#define glEndTransformFeedback glad_glEndTransformFeedback +typedef void (APIENTRYP PFNGLBINDBUFFERRANGEPROC)(GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size); +GLAPI PFNGLBINDBUFFERRANGEPROC glad_glBindBufferRange; +#define glBindBufferRange glad_glBindBufferRange +typedef void (APIENTRYP PFNGLBINDBUFFERBASEPROC)(GLenum target, GLuint index, GLuint buffer); +GLAPI PFNGLBINDBUFFERBASEPROC glad_glBindBufferBase; +#define glBindBufferBase glad_glBindBufferBase +typedef void (APIENTRYP PFNGLTRANSFORMFEEDBACKVARYINGSPROC)(GLuint program, GLsizei count, const GLchar *const*varyings, GLenum bufferMode); +GLAPI PFNGLTRANSFORMFEEDBACKVARYINGSPROC glad_glTransformFeedbackVaryings; +#define glTransformFeedbackVaryings glad_glTransformFeedbackVaryings +typedef void (APIENTRYP PFNGLGETTRANSFORMFEEDBACKVARYINGPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLsizei *size, GLenum *type, GLchar *name); +GLAPI PFNGLGETTRANSFORMFEEDBACKVARYINGPROC glad_glGetTransformFeedbackVarying; +#define glGetTransformFeedbackVarying glad_glGetTransformFeedbackVarying +typedef void (APIENTRYP PFNGLCLAMPCOLORPROC)(GLenum target, GLenum clamp); +GLAPI PFNGLCLAMPCOLORPROC glad_glClampColor; +#define glClampColor glad_glClampColor +typedef void (APIENTRYP PFNGLBEGINCONDITIONALRENDERPROC)(GLuint id, GLenum mode); +GLAPI PFNGLBEGINCONDITIONALRENDERPROC glad_glBeginConditionalRender; +#define glBeginConditionalRender glad_glBeginConditionalRender +typedef void (APIENTRYP PFNGLENDCONDITIONALRENDERPROC)(void); +GLAPI PFNGLENDCONDITIONALRENDERPROC glad_glEndConditionalRender; +#define glEndConditionalRender glad_glEndConditionalRender +typedef void (APIENTRYP PFNGLVERTEXATTRIBIPOINTERPROC)(GLuint index, GLint size, GLenum type, GLsizei stride, const void *pointer); +GLAPI PFNGLVERTEXATTRIBIPOINTERPROC glad_glVertexAttribIPointer; +#define glVertexAttribIPointer glad_glVertexAttribIPointer +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIIVPROC)(GLuint index, GLenum pname, GLint *params); +GLAPI PFNGLGETVERTEXATTRIBIIVPROC glad_glGetVertexAttribIiv; +#define glGetVertexAttribIiv glad_glGetVertexAttribIiv +typedef void (APIENTRYP PFNGLGETVERTEXATTRIBIUIVPROC)(GLuint index, GLenum pname, GLuint *params); +GLAPI PFNGLGETVERTEXATTRIBIUIVPROC glad_glGetVertexAttribIuiv; +#define glGetVertexAttribIuiv glad_glGetVertexAttribIuiv +typedef void (APIENTRYP PFNGLVERTEXATTRIBI1IPROC)(GLuint index, GLint x); +GLAPI PFNGLVERTEXATTRIBI1IPROC glad_glVertexAttribI1i; +#define glVertexAttribI1i glad_glVertexAttribI1i +typedef void (APIENTRYP PFNGLVERTEXATTRIBI2IPROC)(GLuint index, GLint x, GLint y); +GLAPI PFNGLVERTEXATTRIBI2IPROC glad_glVertexAttribI2i; +#define glVertexAttribI2i glad_glVertexAttribI2i +typedef void (APIENTRYP PFNGLVERTEXATTRIBI3IPROC)(GLuint index, GLint x, GLint y, GLint z); +GLAPI PFNGLVERTEXATTRIBI3IPROC glad_glVertexAttribI3i; +#define glVertexAttribI3i glad_glVertexAttribI3i +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4IPROC)(GLuint index, GLint x, GLint y, GLint z, GLint w); +GLAPI PFNGLVERTEXATTRIBI4IPROC glad_glVertexAttribI4i; +#define glVertexAttribI4i glad_glVertexAttribI4i +typedef void (APIENTRYP PFNGLVERTEXATTRIBI1UIPROC)(GLuint index, GLuint x); +GLAPI PFNGLVERTEXATTRIBI1UIPROC glad_glVertexAttribI1ui; +#define glVertexAttribI1ui glad_glVertexAttribI1ui +typedef void (APIENTRYP PFNGLVERTEXATTRIBI2UIPROC)(GLuint index, GLuint x, GLuint y); +GLAPI PFNGLVERTEXATTRIBI2UIPROC glad_glVertexAttribI2ui; +#define glVertexAttribI2ui glad_glVertexAttribI2ui +typedef void (APIENTRYP PFNGLVERTEXATTRIBI3UIPROC)(GLuint index, GLuint x, GLuint y, GLuint z); +GLAPI PFNGLVERTEXATTRIBI3UIPROC glad_glVertexAttribI3ui; +#define glVertexAttribI3ui glad_glVertexAttribI3ui +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UIPROC)(GLuint index, GLuint x, GLuint y, GLuint z, GLuint w); +GLAPI PFNGLVERTEXATTRIBI4UIPROC glad_glVertexAttribI4ui; +#define glVertexAttribI4ui glad_glVertexAttribI4ui +typedef void (APIENTRYP PFNGLVERTEXATTRIBI1IVPROC)(GLuint index, const GLint *v); +GLAPI PFNGLVERTEXATTRIBI1IVPROC glad_glVertexAttribI1iv; +#define glVertexAttribI1iv glad_glVertexAttribI1iv +typedef void (APIENTRYP PFNGLVERTEXATTRIBI2IVPROC)(GLuint index, const GLint *v); +GLAPI PFNGLVERTEXATTRIBI2IVPROC glad_glVertexAttribI2iv; +#define glVertexAttribI2iv glad_glVertexAttribI2iv +typedef void (APIENTRYP PFNGLVERTEXATTRIBI3IVPROC)(GLuint index, const GLint *v); +GLAPI PFNGLVERTEXATTRIBI3IVPROC glad_glVertexAttribI3iv; +#define glVertexAttribI3iv glad_glVertexAttribI3iv +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4IVPROC)(GLuint index, const GLint *v); +GLAPI PFNGLVERTEXATTRIBI4IVPROC glad_glVertexAttribI4iv; +#define glVertexAttribI4iv glad_glVertexAttribI4iv +typedef void (APIENTRYP PFNGLVERTEXATTRIBI1UIVPROC)(GLuint index, const GLuint *v); +GLAPI PFNGLVERTEXATTRIBI1UIVPROC glad_glVertexAttribI1uiv; +#define glVertexAttribI1uiv glad_glVertexAttribI1uiv +typedef void (APIENTRYP PFNGLVERTEXATTRIBI2UIVPROC)(GLuint index, const GLuint *v); +GLAPI PFNGLVERTEXATTRIBI2UIVPROC glad_glVertexAttribI2uiv; +#define glVertexAttribI2uiv glad_glVertexAttribI2uiv +typedef void (APIENTRYP PFNGLVERTEXATTRIBI3UIVPROC)(GLuint index, const GLuint *v); +GLAPI PFNGLVERTEXATTRIBI3UIVPROC glad_glVertexAttribI3uiv; +#define glVertexAttribI3uiv glad_glVertexAttribI3uiv +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UIVPROC)(GLuint index, const GLuint *v); +GLAPI PFNGLVERTEXATTRIBI4UIVPROC glad_glVertexAttribI4uiv; +#define glVertexAttribI4uiv glad_glVertexAttribI4uiv +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4BVPROC)(GLuint index, const GLbyte *v); +GLAPI PFNGLVERTEXATTRIBI4BVPROC glad_glVertexAttribI4bv; +#define glVertexAttribI4bv glad_glVertexAttribI4bv +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4SVPROC)(GLuint index, const GLshort *v); +GLAPI PFNGLVERTEXATTRIBI4SVPROC glad_glVertexAttribI4sv; +#define glVertexAttribI4sv glad_glVertexAttribI4sv +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UBVPROC)(GLuint index, const GLubyte *v); +GLAPI PFNGLVERTEXATTRIBI4UBVPROC glad_glVertexAttribI4ubv; +#define glVertexAttribI4ubv glad_glVertexAttribI4ubv +typedef void (APIENTRYP PFNGLVERTEXATTRIBI4USVPROC)(GLuint index, const GLushort *v); +GLAPI PFNGLVERTEXATTRIBI4USVPROC glad_glVertexAttribI4usv; +#define glVertexAttribI4usv glad_glVertexAttribI4usv +typedef void (APIENTRYP PFNGLGETUNIFORMUIVPROC)(GLuint program, GLint location, GLuint *params); +GLAPI PFNGLGETUNIFORMUIVPROC glad_glGetUniformuiv; +#define glGetUniformuiv glad_glGetUniformuiv +typedef void (APIENTRYP PFNGLBINDFRAGDATALOCATIONPROC)(GLuint program, GLuint color, const GLchar *name); +GLAPI PFNGLBINDFRAGDATALOCATIONPROC glad_glBindFragDataLocation; +#define glBindFragDataLocation glad_glBindFragDataLocation +typedef GLint (APIENTRYP PFNGLGETFRAGDATALOCATIONPROC)(GLuint program, const GLchar *name); +GLAPI PFNGLGETFRAGDATALOCATIONPROC glad_glGetFragDataLocation; +#define glGetFragDataLocation glad_glGetFragDataLocation +typedef void (APIENTRYP PFNGLUNIFORM1UIPROC)(GLint location, GLuint v0); +GLAPI PFNGLUNIFORM1UIPROC glad_glUniform1ui; +#define glUniform1ui glad_glUniform1ui +typedef void (APIENTRYP PFNGLUNIFORM2UIPROC)(GLint location, GLuint v0, GLuint v1); +GLAPI PFNGLUNIFORM2UIPROC glad_glUniform2ui; +#define glUniform2ui glad_glUniform2ui +typedef void (APIENTRYP PFNGLUNIFORM3UIPROC)(GLint location, GLuint v0, GLuint v1, GLuint v2); +GLAPI PFNGLUNIFORM3UIPROC glad_glUniform3ui; +#define glUniform3ui glad_glUniform3ui +typedef void (APIENTRYP PFNGLUNIFORM4UIPROC)(GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3); +GLAPI PFNGLUNIFORM4UIPROC glad_glUniform4ui; +#define glUniform4ui glad_glUniform4ui +typedef void (APIENTRYP PFNGLUNIFORM1UIVPROC)(GLint location, GLsizei count, const GLuint *value); +GLAPI PFNGLUNIFORM1UIVPROC glad_glUniform1uiv; +#define glUniform1uiv glad_glUniform1uiv +typedef void (APIENTRYP PFNGLUNIFORM2UIVPROC)(GLint location, GLsizei count, const GLuint *value); +GLAPI PFNGLUNIFORM2UIVPROC glad_glUniform2uiv; +#define glUniform2uiv glad_glUniform2uiv +typedef void (APIENTRYP PFNGLUNIFORM3UIVPROC)(GLint location, GLsizei count, const GLuint *value); +GLAPI PFNGLUNIFORM3UIVPROC glad_glUniform3uiv; +#define glUniform3uiv glad_glUniform3uiv +typedef void (APIENTRYP PFNGLUNIFORM4UIVPROC)(GLint location, GLsizei count, const GLuint *value); +GLAPI PFNGLUNIFORM4UIVPROC glad_glUniform4uiv; +#define glUniform4uiv glad_glUniform4uiv +typedef void (APIENTRYP PFNGLTEXPARAMETERIIVPROC)(GLenum target, GLenum pname, const GLint *params); +GLAPI PFNGLTEXPARAMETERIIVPROC glad_glTexParameterIiv; +#define glTexParameterIiv glad_glTexParameterIiv +typedef void (APIENTRYP PFNGLTEXPARAMETERIUIVPROC)(GLenum target, GLenum pname, const GLuint *params); +GLAPI PFNGLTEXPARAMETERIUIVPROC glad_glTexParameterIuiv; +#define glTexParameterIuiv glad_glTexParameterIuiv +typedef void (APIENTRYP PFNGLGETTEXPARAMETERIIVPROC)(GLenum target, GLenum pname, GLint *params); +GLAPI PFNGLGETTEXPARAMETERIIVPROC glad_glGetTexParameterIiv; +#define glGetTexParameterIiv glad_glGetTexParameterIiv +typedef void (APIENTRYP PFNGLGETTEXPARAMETERIUIVPROC)(GLenum target, GLenum pname, GLuint *params); +GLAPI PFNGLGETTEXPARAMETERIUIVPROC glad_glGetTexParameterIuiv; +#define glGetTexParameterIuiv glad_glGetTexParameterIuiv +typedef void (APIENTRYP PFNGLCLEARBUFFERIVPROC)(GLenum buffer, GLint drawbuffer, const GLint *value); +GLAPI PFNGLCLEARBUFFERIVPROC glad_glClearBufferiv; +#define glClearBufferiv glad_glClearBufferiv +typedef void (APIENTRYP PFNGLCLEARBUFFERUIVPROC)(GLenum buffer, GLint drawbuffer, const GLuint *value); +GLAPI PFNGLCLEARBUFFERUIVPROC glad_glClearBufferuiv; +#define glClearBufferuiv glad_glClearBufferuiv +typedef void (APIENTRYP PFNGLCLEARBUFFERFVPROC)(GLenum buffer, GLint drawbuffer, const GLfloat *value); +GLAPI PFNGLCLEARBUFFERFVPROC glad_glClearBufferfv; +#define glClearBufferfv glad_glClearBufferfv +typedef void (APIENTRYP PFNGLCLEARBUFFERFIPROC)(GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil); +GLAPI PFNGLCLEARBUFFERFIPROC glad_glClearBufferfi; +#define glClearBufferfi glad_glClearBufferfi +typedef const GLubyte * (APIENTRYP PFNGLGETSTRINGIPROC)(GLenum name, GLuint index); +GLAPI PFNGLGETSTRINGIPROC glad_glGetStringi; +#define glGetStringi glad_glGetStringi +typedef GLboolean (APIENTRYP PFNGLISRENDERBUFFERPROC)(GLuint renderbuffer); +GLAPI PFNGLISRENDERBUFFERPROC glad_glIsRenderbuffer; +#define glIsRenderbuffer glad_glIsRenderbuffer +typedef void (APIENTRYP PFNGLBINDRENDERBUFFERPROC)(GLenum target, GLuint renderbuffer); +GLAPI PFNGLBINDRENDERBUFFERPROC glad_glBindRenderbuffer; +#define glBindRenderbuffer glad_glBindRenderbuffer +typedef void (APIENTRYP PFNGLDELETERENDERBUFFERSPROC)(GLsizei n, const GLuint *renderbuffers); +GLAPI PFNGLDELETERENDERBUFFERSPROC glad_glDeleteRenderbuffers; +#define glDeleteRenderbuffers glad_glDeleteRenderbuffers +typedef void (APIENTRYP PFNGLGENRENDERBUFFERSPROC)(GLsizei n, GLuint *renderbuffers); +GLAPI PFNGLGENRENDERBUFFERSPROC glad_glGenRenderbuffers; +#define glGenRenderbuffers glad_glGenRenderbuffers +typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEPROC)(GLenum target, GLenum internalformat, GLsizei width, GLsizei height); +GLAPI PFNGLRENDERBUFFERSTORAGEPROC glad_glRenderbufferStorage; +#define glRenderbufferStorage glad_glRenderbufferStorage +typedef void (APIENTRYP PFNGLGETRENDERBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint *params); +GLAPI PFNGLGETRENDERBUFFERPARAMETERIVPROC glad_glGetRenderbufferParameteriv; +#define glGetRenderbufferParameteriv glad_glGetRenderbufferParameteriv +typedef GLboolean (APIENTRYP PFNGLISFRAMEBUFFERPROC)(GLuint framebuffer); +GLAPI PFNGLISFRAMEBUFFERPROC glad_glIsFramebuffer; +#define glIsFramebuffer glad_glIsFramebuffer +typedef void (APIENTRYP PFNGLBINDFRAMEBUFFERPROC)(GLenum target, GLuint framebuffer); +GLAPI PFNGLBINDFRAMEBUFFERPROC glad_glBindFramebuffer; +#define glBindFramebuffer glad_glBindFramebuffer +typedef void (APIENTRYP PFNGLDELETEFRAMEBUFFERSPROC)(GLsizei n, const GLuint *framebuffers); +GLAPI PFNGLDELETEFRAMEBUFFERSPROC glad_glDeleteFramebuffers; +#define glDeleteFramebuffers glad_glDeleteFramebuffers +typedef void (APIENTRYP PFNGLGENFRAMEBUFFERSPROC)(GLsizei n, GLuint *framebuffers); +GLAPI PFNGLGENFRAMEBUFFERSPROC glad_glGenFramebuffers; +#define glGenFramebuffers glad_glGenFramebuffers +typedef GLenum (APIENTRYP PFNGLCHECKFRAMEBUFFERSTATUSPROC)(GLenum target); +GLAPI PFNGLCHECKFRAMEBUFFERSTATUSPROC glad_glCheckFramebufferStatus; +#define glCheckFramebufferStatus glad_glCheckFramebufferStatus +typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE1DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +GLAPI PFNGLFRAMEBUFFERTEXTURE1DPROC glad_glFramebufferTexture1D; +#define glFramebufferTexture1D glad_glFramebufferTexture1D +typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +GLAPI PFNGLFRAMEBUFFERTEXTURE2DPROC glad_glFramebufferTexture2D; +#define glFramebufferTexture2D glad_glFramebufferTexture2D +typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE3DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset); +GLAPI PFNGLFRAMEBUFFERTEXTURE3DPROC glad_glFramebufferTexture3D; +#define glFramebufferTexture3D glad_glFramebufferTexture3D +typedef void (APIENTRYP PFNGLFRAMEBUFFERRENDERBUFFERPROC)(GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +GLAPI PFNGLFRAMEBUFFERRENDERBUFFERPROC glad_glFramebufferRenderbuffer; +#define glFramebufferRenderbuffer glad_glFramebufferRenderbuffer +typedef void (APIENTRYP PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC)(GLenum target, GLenum attachment, GLenum pname, GLint *params); +GLAPI PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetFramebufferAttachmentParameteriv; +#define glGetFramebufferAttachmentParameteriv glad_glGetFramebufferAttachmentParameteriv +typedef void (APIENTRYP PFNGLGENERATEMIPMAPPROC)(GLenum target); +GLAPI PFNGLGENERATEMIPMAPPROC glad_glGenerateMipmap; +#define glGenerateMipmap glad_glGenerateMipmap +typedef void (APIENTRYP PFNGLBLITFRAMEBUFFERPROC)(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +GLAPI PFNGLBLITFRAMEBUFFERPROC glad_glBlitFramebuffer; +#define glBlitFramebuffer glad_glBlitFramebuffer +typedef void (APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +GLAPI PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC glad_glRenderbufferStorageMultisample; +#define glRenderbufferStorageMultisample glad_glRenderbufferStorageMultisample +typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURELAYERPROC)(GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer); +GLAPI PFNGLFRAMEBUFFERTEXTURELAYERPROC glad_glFramebufferTextureLayer; +#define glFramebufferTextureLayer glad_glFramebufferTextureLayer +typedef void * (APIENTRYP PFNGLMAPBUFFERRANGEPROC)(GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access); +GLAPI PFNGLMAPBUFFERRANGEPROC glad_glMapBufferRange; +#define glMapBufferRange glad_glMapBufferRange +typedef void (APIENTRYP PFNGLFLUSHMAPPEDBUFFERRANGEPROC)(GLenum target, GLintptr offset, GLsizeiptr length); +GLAPI PFNGLFLUSHMAPPEDBUFFERRANGEPROC glad_glFlushMappedBufferRange; +#define glFlushMappedBufferRange glad_glFlushMappedBufferRange +typedef void (APIENTRYP PFNGLBINDVERTEXARRAYPROC)(GLuint array); +GLAPI PFNGLBINDVERTEXARRAYPROC glad_glBindVertexArray; +#define glBindVertexArray glad_glBindVertexArray +typedef void (APIENTRYP PFNGLDELETEVERTEXARRAYSPROC)(GLsizei n, const GLuint *arrays); +GLAPI PFNGLDELETEVERTEXARRAYSPROC glad_glDeleteVertexArrays; +#define glDeleteVertexArrays glad_glDeleteVertexArrays +typedef void (APIENTRYP PFNGLGENVERTEXARRAYSPROC)(GLsizei n, GLuint *arrays); +GLAPI PFNGLGENVERTEXARRAYSPROC glad_glGenVertexArrays; +#define glGenVertexArrays glad_glGenVertexArrays +typedef GLboolean (APIENTRYP PFNGLISVERTEXARRAYPROC)(GLuint array); +GLAPI PFNGLISVERTEXARRAYPROC glad_glIsVertexArray; +#define glIsVertexArray glad_glIsVertexArray +#endif +#ifndef GL_VERSION_3_1 +#define GL_VERSION_3_1 1 +GLAPI int GLAD_GL_VERSION_3_1; +typedef void (APIENTRYP PFNGLDRAWARRAYSINSTANCEDPROC)(GLenum mode, GLint first, GLsizei count, GLsizei instancecount); +GLAPI PFNGLDRAWARRAYSINSTANCEDPROC glad_glDrawArraysInstanced; +#define glDrawArraysInstanced glad_glDrawArraysInstanced +typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDPROC)(GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount); +GLAPI PFNGLDRAWELEMENTSINSTANCEDPROC glad_glDrawElementsInstanced; +#define glDrawElementsInstanced glad_glDrawElementsInstanced +typedef void (APIENTRYP PFNGLTEXBUFFERPROC)(GLenum target, GLenum internalformat, GLuint buffer); +GLAPI PFNGLTEXBUFFERPROC glad_glTexBuffer; +#define glTexBuffer glad_glTexBuffer +typedef void (APIENTRYP PFNGLPRIMITIVERESTARTINDEXPROC)(GLuint index); +GLAPI PFNGLPRIMITIVERESTARTINDEXPROC glad_glPrimitiveRestartIndex; +#define glPrimitiveRestartIndex glad_glPrimitiveRestartIndex +typedef void (APIENTRYP PFNGLCOPYBUFFERSUBDATAPROC)(GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size); +GLAPI PFNGLCOPYBUFFERSUBDATAPROC glad_glCopyBufferSubData; +#define glCopyBufferSubData glad_glCopyBufferSubData +typedef void (APIENTRYP PFNGLGETUNIFORMINDICESPROC)(GLuint program, GLsizei uniformCount, const GLchar *const*uniformNames, GLuint *uniformIndices); +GLAPI PFNGLGETUNIFORMINDICESPROC glad_glGetUniformIndices; +#define glGetUniformIndices glad_glGetUniformIndices +typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMSIVPROC)(GLuint program, GLsizei uniformCount, const GLuint *uniformIndices, GLenum pname, GLint *params); +GLAPI PFNGLGETACTIVEUNIFORMSIVPROC glad_glGetActiveUniformsiv; +#define glGetActiveUniformsiv glad_glGetActiveUniformsiv +typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMNAMEPROC)(GLuint program, GLuint uniformIndex, GLsizei bufSize, GLsizei *length, GLchar *uniformName); +GLAPI PFNGLGETACTIVEUNIFORMNAMEPROC glad_glGetActiveUniformName; +#define glGetActiveUniformName glad_glGetActiveUniformName +typedef GLuint (APIENTRYP PFNGLGETUNIFORMBLOCKINDEXPROC)(GLuint program, const GLchar *uniformBlockName); +GLAPI PFNGLGETUNIFORMBLOCKINDEXPROC glad_glGetUniformBlockIndex; +#define glGetUniformBlockIndex glad_glGetUniformBlockIndex +typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMBLOCKIVPROC)(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params); +GLAPI PFNGLGETACTIVEUNIFORMBLOCKIVPROC glad_glGetActiveUniformBlockiv; +#define glGetActiveUniformBlockiv glad_glGetActiveUniformBlockiv +typedef void (APIENTRYP PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC)(GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei *length, GLchar *uniformBlockName); +GLAPI PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC glad_glGetActiveUniformBlockName; +#define glGetActiveUniformBlockName glad_glGetActiveUniformBlockName +typedef void (APIENTRYP PFNGLUNIFORMBLOCKBINDINGPROC)(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding); +GLAPI PFNGLUNIFORMBLOCKBINDINGPROC glad_glUniformBlockBinding; +#define glUniformBlockBinding glad_glUniformBlockBinding +#endif +#ifndef GL_VERSION_3_2 +#define GL_VERSION_3_2 1 +GLAPI int GLAD_GL_VERSION_3_2; +typedef void (APIENTRYP PFNGLDRAWELEMENTSBASEVERTEXPROC)(GLenum mode, GLsizei count, GLenum type, const void *indices, GLint basevertex); +GLAPI PFNGLDRAWELEMENTSBASEVERTEXPROC glad_glDrawElementsBaseVertex; +#define glDrawElementsBaseVertex glad_glDrawElementsBaseVertex +typedef void (APIENTRYP PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC)(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void *indices, GLint basevertex); +GLAPI PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC glad_glDrawRangeElementsBaseVertex; +#define glDrawRangeElementsBaseVertex glad_glDrawRangeElementsBaseVertex +typedef void (APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC)(GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount, GLint basevertex); +GLAPI PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC glad_glDrawElementsInstancedBaseVertex; +#define glDrawElementsInstancedBaseVertex glad_glDrawElementsInstancedBaseVertex +typedef void (APIENTRYP PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC)(GLenum mode, const GLsizei *count, GLenum type, const void *const*indices, GLsizei drawcount, const GLint *basevertex); +GLAPI PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC glad_glMultiDrawElementsBaseVertex; +#define glMultiDrawElementsBaseVertex glad_glMultiDrawElementsBaseVertex +typedef void (APIENTRYP PFNGLPROVOKINGVERTEXPROC)(GLenum mode); +GLAPI PFNGLPROVOKINGVERTEXPROC glad_glProvokingVertex; +#define glProvokingVertex glad_glProvokingVertex +typedef GLsync (APIENTRYP PFNGLFENCESYNCPROC)(GLenum condition, GLbitfield flags); +GLAPI PFNGLFENCESYNCPROC glad_glFenceSync; +#define glFenceSync glad_glFenceSync +typedef GLboolean (APIENTRYP PFNGLISSYNCPROC)(GLsync sync); +GLAPI PFNGLISSYNCPROC glad_glIsSync; +#define glIsSync glad_glIsSync +typedef void (APIENTRYP PFNGLDELETESYNCPROC)(GLsync sync); +GLAPI PFNGLDELETESYNCPROC glad_glDeleteSync; +#define glDeleteSync glad_glDeleteSync +typedef GLenum (APIENTRYP PFNGLCLIENTWAITSYNCPROC)(GLsync sync, GLbitfield flags, GLuint64 timeout); +GLAPI PFNGLCLIENTWAITSYNCPROC glad_glClientWaitSync; +#define glClientWaitSync glad_glClientWaitSync +typedef void (APIENTRYP PFNGLWAITSYNCPROC)(GLsync sync, GLbitfield flags, GLuint64 timeout); +GLAPI PFNGLWAITSYNCPROC glad_glWaitSync; +#define glWaitSync glad_glWaitSync +typedef void (APIENTRYP PFNGLGETINTEGER64VPROC)(GLenum pname, GLint64 *data); +GLAPI PFNGLGETINTEGER64VPROC glad_glGetInteger64v; +#define glGetInteger64v glad_glGetInteger64v +typedef void (APIENTRYP PFNGLGETSYNCIVPROC)(GLsync sync, GLenum pname, GLsizei count, GLsizei *length, GLint *values); +GLAPI PFNGLGETSYNCIVPROC glad_glGetSynciv; +#define glGetSynciv glad_glGetSynciv +typedef void (APIENTRYP PFNGLGETINTEGER64I_VPROC)(GLenum target, GLuint index, GLint64 *data); +GLAPI PFNGLGETINTEGER64I_VPROC glad_glGetInteger64i_v; +#define glGetInteger64i_v glad_glGetInteger64i_v +typedef void (APIENTRYP PFNGLGETBUFFERPARAMETERI64VPROC)(GLenum target, GLenum pname, GLint64 *params); +GLAPI PFNGLGETBUFFERPARAMETERI64VPROC glad_glGetBufferParameteri64v; +#define glGetBufferParameteri64v glad_glGetBufferParameteri64v +typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTUREPROC)(GLenum target, GLenum attachment, GLuint texture, GLint level); +GLAPI PFNGLFRAMEBUFFERTEXTUREPROC glad_glFramebufferTexture; +#define glFramebufferTexture glad_glFramebufferTexture +typedef void (APIENTRYP PFNGLTEXIMAGE2DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations); +GLAPI PFNGLTEXIMAGE2DMULTISAMPLEPROC glad_glTexImage2DMultisample; +#define glTexImage2DMultisample glad_glTexImage2DMultisample +typedef void (APIENTRYP PFNGLTEXIMAGE3DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations); +GLAPI PFNGLTEXIMAGE3DMULTISAMPLEPROC glad_glTexImage3DMultisample; +#define glTexImage3DMultisample glad_glTexImage3DMultisample +typedef void (APIENTRYP PFNGLGETMULTISAMPLEFVPROC)(GLenum pname, GLuint index, GLfloat *val); +GLAPI PFNGLGETMULTISAMPLEFVPROC glad_glGetMultisamplefv; +#define glGetMultisamplefv glad_glGetMultisamplefv +typedef void (APIENTRYP PFNGLSAMPLEMASKIPROC)(GLuint maskNumber, GLbitfield mask); +GLAPI PFNGLSAMPLEMASKIPROC glad_glSampleMaski; +#define glSampleMaski glad_glSampleMaski +#endif +#ifndef GL_VERSION_3_3 +#define GL_VERSION_3_3 1 +GLAPI int GLAD_GL_VERSION_3_3; +typedef void (APIENTRYP PFNGLBINDFRAGDATALOCATIONINDEXEDPROC)(GLuint program, GLuint colorNumber, GLuint index, const GLchar *name); +GLAPI PFNGLBINDFRAGDATALOCATIONINDEXEDPROC glad_glBindFragDataLocationIndexed; +#define glBindFragDataLocationIndexed glad_glBindFragDataLocationIndexed +typedef GLint (APIENTRYP PFNGLGETFRAGDATAINDEXPROC)(GLuint program, const GLchar *name); +GLAPI PFNGLGETFRAGDATAINDEXPROC glad_glGetFragDataIndex; +#define glGetFragDataIndex glad_glGetFragDataIndex +typedef void (APIENTRYP PFNGLGENSAMPLERSPROC)(GLsizei count, GLuint *samplers); +GLAPI PFNGLGENSAMPLERSPROC glad_glGenSamplers; +#define glGenSamplers glad_glGenSamplers +typedef void (APIENTRYP PFNGLDELETESAMPLERSPROC)(GLsizei count, const GLuint *samplers); +GLAPI PFNGLDELETESAMPLERSPROC glad_glDeleteSamplers; +#define glDeleteSamplers glad_glDeleteSamplers +typedef GLboolean (APIENTRYP PFNGLISSAMPLERPROC)(GLuint sampler); +GLAPI PFNGLISSAMPLERPROC glad_glIsSampler; +#define glIsSampler glad_glIsSampler +typedef void (APIENTRYP PFNGLBINDSAMPLERPROC)(GLuint unit, GLuint sampler); +GLAPI PFNGLBINDSAMPLERPROC glad_glBindSampler; +#define glBindSampler glad_glBindSampler +typedef void (APIENTRYP PFNGLSAMPLERPARAMETERIPROC)(GLuint sampler, GLenum pname, GLint param); +GLAPI PFNGLSAMPLERPARAMETERIPROC glad_glSamplerParameteri; +#define glSamplerParameteri glad_glSamplerParameteri +typedef void (APIENTRYP PFNGLSAMPLERPARAMETERIVPROC)(GLuint sampler, GLenum pname, const GLint *param); +GLAPI PFNGLSAMPLERPARAMETERIVPROC glad_glSamplerParameteriv; +#define glSamplerParameteriv glad_glSamplerParameteriv +typedef void (APIENTRYP PFNGLSAMPLERPARAMETERFPROC)(GLuint sampler, GLenum pname, GLfloat param); +GLAPI PFNGLSAMPLERPARAMETERFPROC glad_glSamplerParameterf; +#define glSamplerParameterf glad_glSamplerParameterf +typedef void (APIENTRYP PFNGLSAMPLERPARAMETERFVPROC)(GLuint sampler, GLenum pname, const GLfloat *param); +GLAPI PFNGLSAMPLERPARAMETERFVPROC glad_glSamplerParameterfv; +#define glSamplerParameterfv glad_glSamplerParameterfv +typedef void (APIENTRYP PFNGLSAMPLERPARAMETERIIVPROC)(GLuint sampler, GLenum pname, const GLint *param); +GLAPI PFNGLSAMPLERPARAMETERIIVPROC glad_glSamplerParameterIiv; +#define glSamplerParameterIiv glad_glSamplerParameterIiv +typedef void (APIENTRYP PFNGLSAMPLERPARAMETERIUIVPROC)(GLuint sampler, GLenum pname, const GLuint *param); +GLAPI PFNGLSAMPLERPARAMETERIUIVPROC glad_glSamplerParameterIuiv; +#define glSamplerParameterIuiv glad_glSamplerParameterIuiv +typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERIVPROC)(GLuint sampler, GLenum pname, GLint *params); +GLAPI PFNGLGETSAMPLERPARAMETERIVPROC glad_glGetSamplerParameteriv; +#define glGetSamplerParameteriv glad_glGetSamplerParameteriv +typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERIIVPROC)(GLuint sampler, GLenum pname, GLint *params); +GLAPI PFNGLGETSAMPLERPARAMETERIIVPROC glad_glGetSamplerParameterIiv; +#define glGetSamplerParameterIiv glad_glGetSamplerParameterIiv +typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERFVPROC)(GLuint sampler, GLenum pname, GLfloat *params); +GLAPI PFNGLGETSAMPLERPARAMETERFVPROC glad_glGetSamplerParameterfv; +#define glGetSamplerParameterfv glad_glGetSamplerParameterfv +typedef void (APIENTRYP PFNGLGETSAMPLERPARAMETERIUIVPROC)(GLuint sampler, GLenum pname, GLuint *params); +GLAPI PFNGLGETSAMPLERPARAMETERIUIVPROC glad_glGetSamplerParameterIuiv; +#define glGetSamplerParameterIuiv glad_glGetSamplerParameterIuiv +typedef void (APIENTRYP PFNGLQUERYCOUNTERPROC)(GLuint id, GLenum target); +GLAPI PFNGLQUERYCOUNTERPROC glad_glQueryCounter; +#define glQueryCounter glad_glQueryCounter +typedef void (APIENTRYP PFNGLGETQUERYOBJECTI64VPROC)(GLuint id, GLenum pname, GLint64 *params); +GLAPI PFNGLGETQUERYOBJECTI64VPROC glad_glGetQueryObjecti64v; +#define glGetQueryObjecti64v glad_glGetQueryObjecti64v +typedef void (APIENTRYP PFNGLGETQUERYOBJECTUI64VPROC)(GLuint id, GLenum pname, GLuint64 *params); +GLAPI PFNGLGETQUERYOBJECTUI64VPROC glad_glGetQueryObjectui64v; +#define glGetQueryObjectui64v glad_glGetQueryObjectui64v +typedef void (APIENTRYP PFNGLVERTEXATTRIBDIVISORPROC)(GLuint index, GLuint divisor); +GLAPI PFNGLVERTEXATTRIBDIVISORPROC glad_glVertexAttribDivisor; +#define glVertexAttribDivisor glad_glVertexAttribDivisor +typedef void (APIENTRYP PFNGLVERTEXATTRIBP1UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value); +GLAPI PFNGLVERTEXATTRIBP1UIPROC glad_glVertexAttribP1ui; +#define glVertexAttribP1ui glad_glVertexAttribP1ui +typedef void (APIENTRYP PFNGLVERTEXATTRIBP1UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint *value); +GLAPI PFNGLVERTEXATTRIBP1UIVPROC glad_glVertexAttribP1uiv; +#define glVertexAttribP1uiv glad_glVertexAttribP1uiv +typedef void (APIENTRYP PFNGLVERTEXATTRIBP2UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value); +GLAPI PFNGLVERTEXATTRIBP2UIPROC glad_glVertexAttribP2ui; +#define glVertexAttribP2ui glad_glVertexAttribP2ui +typedef void (APIENTRYP PFNGLVERTEXATTRIBP2UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint *value); +GLAPI PFNGLVERTEXATTRIBP2UIVPROC glad_glVertexAttribP2uiv; +#define glVertexAttribP2uiv glad_glVertexAttribP2uiv +typedef void (APIENTRYP PFNGLVERTEXATTRIBP3UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value); +GLAPI PFNGLVERTEXATTRIBP3UIPROC glad_glVertexAttribP3ui; +#define glVertexAttribP3ui glad_glVertexAttribP3ui +typedef void (APIENTRYP PFNGLVERTEXATTRIBP3UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint *value); +GLAPI PFNGLVERTEXATTRIBP3UIVPROC glad_glVertexAttribP3uiv; +#define glVertexAttribP3uiv glad_glVertexAttribP3uiv +typedef void (APIENTRYP PFNGLVERTEXATTRIBP4UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value); +GLAPI PFNGLVERTEXATTRIBP4UIPROC glad_glVertexAttribP4ui; +#define glVertexAttribP4ui glad_glVertexAttribP4ui +typedef void (APIENTRYP PFNGLVERTEXATTRIBP4UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint *value); +GLAPI PFNGLVERTEXATTRIBP4UIVPROC glad_glVertexAttribP4uiv; +#define glVertexAttribP4uiv glad_glVertexAttribP4uiv +typedef void (APIENTRYP PFNGLVERTEXP2UIPROC)(GLenum type, GLuint value); +GLAPI PFNGLVERTEXP2UIPROC glad_glVertexP2ui; +#define glVertexP2ui glad_glVertexP2ui +typedef void (APIENTRYP PFNGLVERTEXP2UIVPROC)(GLenum type, const GLuint *value); +GLAPI PFNGLVERTEXP2UIVPROC glad_glVertexP2uiv; +#define glVertexP2uiv glad_glVertexP2uiv +typedef void (APIENTRYP PFNGLVERTEXP3UIPROC)(GLenum type, GLuint value); +GLAPI PFNGLVERTEXP3UIPROC glad_glVertexP3ui; +#define glVertexP3ui glad_glVertexP3ui +typedef void (APIENTRYP PFNGLVERTEXP3UIVPROC)(GLenum type, const GLuint *value); +GLAPI PFNGLVERTEXP3UIVPROC glad_glVertexP3uiv; +#define glVertexP3uiv glad_glVertexP3uiv +typedef void (APIENTRYP PFNGLVERTEXP4UIPROC)(GLenum type, GLuint value); +GLAPI PFNGLVERTEXP4UIPROC glad_glVertexP4ui; +#define glVertexP4ui glad_glVertexP4ui +typedef void (APIENTRYP PFNGLVERTEXP4UIVPROC)(GLenum type, const GLuint *value); +GLAPI PFNGLVERTEXP4UIVPROC glad_glVertexP4uiv; +#define glVertexP4uiv glad_glVertexP4uiv +typedef void (APIENTRYP PFNGLTEXCOORDP1UIPROC)(GLenum type, GLuint coords); +GLAPI PFNGLTEXCOORDP1UIPROC glad_glTexCoordP1ui; +#define glTexCoordP1ui glad_glTexCoordP1ui +typedef void (APIENTRYP PFNGLTEXCOORDP1UIVPROC)(GLenum type, const GLuint *coords); +GLAPI PFNGLTEXCOORDP1UIVPROC glad_glTexCoordP1uiv; +#define glTexCoordP1uiv glad_glTexCoordP1uiv +typedef void (APIENTRYP PFNGLTEXCOORDP2UIPROC)(GLenum type, GLuint coords); +GLAPI PFNGLTEXCOORDP2UIPROC glad_glTexCoordP2ui; +#define glTexCoordP2ui glad_glTexCoordP2ui +typedef void (APIENTRYP PFNGLTEXCOORDP2UIVPROC)(GLenum type, const GLuint *coords); +GLAPI PFNGLTEXCOORDP2UIVPROC glad_glTexCoordP2uiv; +#define glTexCoordP2uiv glad_glTexCoordP2uiv +typedef void (APIENTRYP PFNGLTEXCOORDP3UIPROC)(GLenum type, GLuint coords); +GLAPI PFNGLTEXCOORDP3UIPROC glad_glTexCoordP3ui; +#define glTexCoordP3ui glad_glTexCoordP3ui +typedef void (APIENTRYP PFNGLTEXCOORDP3UIVPROC)(GLenum type, const GLuint *coords); +GLAPI PFNGLTEXCOORDP3UIVPROC glad_glTexCoordP3uiv; +#define glTexCoordP3uiv glad_glTexCoordP3uiv +typedef void (APIENTRYP PFNGLTEXCOORDP4UIPROC)(GLenum type, GLuint coords); +GLAPI PFNGLTEXCOORDP4UIPROC glad_glTexCoordP4ui; +#define glTexCoordP4ui glad_glTexCoordP4ui +typedef void (APIENTRYP PFNGLTEXCOORDP4UIVPROC)(GLenum type, const GLuint *coords); +GLAPI PFNGLTEXCOORDP4UIVPROC glad_glTexCoordP4uiv; +#define glTexCoordP4uiv glad_glTexCoordP4uiv +typedef void (APIENTRYP PFNGLMULTITEXCOORDP1UIPROC)(GLenum texture, GLenum type, GLuint coords); +GLAPI PFNGLMULTITEXCOORDP1UIPROC glad_glMultiTexCoordP1ui; +#define glMultiTexCoordP1ui glad_glMultiTexCoordP1ui +typedef void (APIENTRYP PFNGLMULTITEXCOORDP1UIVPROC)(GLenum texture, GLenum type, const GLuint *coords); +GLAPI PFNGLMULTITEXCOORDP1UIVPROC glad_glMultiTexCoordP1uiv; +#define glMultiTexCoordP1uiv glad_glMultiTexCoordP1uiv +typedef void (APIENTRYP PFNGLMULTITEXCOORDP2UIPROC)(GLenum texture, GLenum type, GLuint coords); +GLAPI PFNGLMULTITEXCOORDP2UIPROC glad_glMultiTexCoordP2ui; +#define glMultiTexCoordP2ui glad_glMultiTexCoordP2ui +typedef void (APIENTRYP PFNGLMULTITEXCOORDP2UIVPROC)(GLenum texture, GLenum type, const GLuint *coords); +GLAPI PFNGLMULTITEXCOORDP2UIVPROC glad_glMultiTexCoordP2uiv; +#define glMultiTexCoordP2uiv glad_glMultiTexCoordP2uiv +typedef void (APIENTRYP PFNGLMULTITEXCOORDP3UIPROC)(GLenum texture, GLenum type, GLuint coords); +GLAPI PFNGLMULTITEXCOORDP3UIPROC glad_glMultiTexCoordP3ui; +#define glMultiTexCoordP3ui glad_glMultiTexCoordP3ui +typedef void (APIENTRYP PFNGLMULTITEXCOORDP3UIVPROC)(GLenum texture, GLenum type, const GLuint *coords); +GLAPI PFNGLMULTITEXCOORDP3UIVPROC glad_glMultiTexCoordP3uiv; +#define glMultiTexCoordP3uiv glad_glMultiTexCoordP3uiv +typedef void (APIENTRYP PFNGLMULTITEXCOORDP4UIPROC)(GLenum texture, GLenum type, GLuint coords); +GLAPI PFNGLMULTITEXCOORDP4UIPROC glad_glMultiTexCoordP4ui; +#define glMultiTexCoordP4ui glad_glMultiTexCoordP4ui +typedef void (APIENTRYP PFNGLMULTITEXCOORDP4UIVPROC)(GLenum texture, GLenum type, const GLuint *coords); +GLAPI PFNGLMULTITEXCOORDP4UIVPROC glad_glMultiTexCoordP4uiv; +#define glMultiTexCoordP4uiv glad_glMultiTexCoordP4uiv +typedef void (APIENTRYP PFNGLNORMALP3UIPROC)(GLenum type, GLuint coords); +GLAPI PFNGLNORMALP3UIPROC glad_glNormalP3ui; +#define glNormalP3ui glad_glNormalP3ui +typedef void (APIENTRYP PFNGLNORMALP3UIVPROC)(GLenum type, const GLuint *coords); +GLAPI PFNGLNORMALP3UIVPROC glad_glNormalP3uiv; +#define glNormalP3uiv glad_glNormalP3uiv +typedef void (APIENTRYP PFNGLCOLORP3UIPROC)(GLenum type, GLuint color); +GLAPI PFNGLCOLORP3UIPROC glad_glColorP3ui; +#define glColorP3ui glad_glColorP3ui +typedef void (APIENTRYP PFNGLCOLORP3UIVPROC)(GLenum type, const GLuint *color); +GLAPI PFNGLCOLORP3UIVPROC glad_glColorP3uiv; +#define glColorP3uiv glad_glColorP3uiv +typedef void (APIENTRYP PFNGLCOLORP4UIPROC)(GLenum type, GLuint color); +GLAPI PFNGLCOLORP4UIPROC glad_glColorP4ui; +#define glColorP4ui glad_glColorP4ui +typedef void (APIENTRYP PFNGLCOLORP4UIVPROC)(GLenum type, const GLuint *color); +GLAPI PFNGLCOLORP4UIVPROC glad_glColorP4uiv; +#define glColorP4uiv glad_glColorP4uiv +typedef void (APIENTRYP PFNGLSECONDARYCOLORP3UIPROC)(GLenum type, GLuint color); +GLAPI PFNGLSECONDARYCOLORP3UIPROC glad_glSecondaryColorP3ui; +#define glSecondaryColorP3ui glad_glSecondaryColorP3ui +typedef void (APIENTRYP PFNGLSECONDARYCOLORP3UIVPROC)(GLenum type, const GLuint *color); +GLAPI PFNGLSECONDARYCOLORP3UIVPROC glad_glSecondaryColorP3uiv; +#define glSecondaryColorP3uiv glad_glSecondaryColorP3uiv +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/pufferlib/ocean/constellation/point_particle.fs b/pufferlib/ocean/constellation/point_particle.fs new file mode 100644 index 000000000..f73386f62 --- /dev/null +++ b/pufferlib/ocean/constellation/point_particle.fs @@ -0,0 +1,27 @@ +#version 330 + +in vec4 fragColor; +out vec4 finalColor; + +void main() +{ +vec2 uv = gl_PointCoord - vec2(0.5); // center to edge +float dist = length(uv); // distance from center (0.0–0.707) + +// Optional: discard hard edge for exact circular point +if (dist > 0.5) + discard; + +// Smooth exponential falloff +float glow = exp(-24.0 * dist * dist); // steeper falloff = tighter glow + +// Bright, saturated core color +vec3 color = fragColor.rgb * glow * 5.0; + +// Only output color if visible — remove black halo +if (glow < 0.01) + discard; + +finalColor = vec4(color, 1.0); + +} diff --git a/pufferlib/ocean/constellation/point_particle.vs b/pufferlib/ocean/constellation/point_particle.vs new file mode 100644 index 000000000..2f0a38e4a --- /dev/null +++ b/pufferlib/ocean/constellation/point_particle.vs @@ -0,0 +1,31 @@ +#version 330 + +// Input vertex attributes +in vec3 vertexPosition; +in vec4 vertexColor; + +// Input uniform values +uniform mat4 mvp; +uniform float currentTime; + +// Output to fragment shader +out vec4 fragColor; + +// NOTE: Add your custom variables here + +void main() +{ + // Unpack data from vertexPosition + vec2 pos = vertexPosition.xy; + float period = vertexPosition.z; + + // Calculate final vertex position (jiggle it around a bit horizontally) + //pos += vec2(100, 0)*sin(period*currentTime); + gl_Position = mvp*vec4(pos, 0.0, 1.0); + + // Calculate the screen space size of this particle (also vary it over time) + //gl_PointSize = 10 - 5*abs(sin(period*currentTime)); + gl_PointSize = 10.0; + + fragColor = vertexColor; +} diff --git a/pufferlib/ocean/constellation/star_shader.fs b/pufferlib/ocean/constellation/star_shader.fs new file mode 100644 index 000000000..e4c8d215f --- /dev/null +++ b/pufferlib/ocean/constellation/star_shader.fs @@ -0,0 +1,11 @@ +#version 330 + +in vec4 fragColor; +out vec4 finalColor; + +void main() { + // Optional: Circular points + vec2 coord = gl_PointCoord - vec2(0.5); + if (length(coord) > 0.5) discard; + finalColor = fragColor; +} diff --git a/pufferlib/ocean/constellation/star_shader.vs b/pufferlib/ocean/constellation/star_shader.vs new file mode 100644 index 000000000..c31bd2506 --- /dev/null +++ b/pufferlib/ocean/constellation/star_shader.vs @@ -0,0 +1,20 @@ +#version 330 + +in vec2 position; // Screen-space position +in vec4 color; // RGBA color +out vec4 fragColor; + +uniform float screenWidth; +uniform float screenHeight; +uniform float pointSize; + +void main() { + // Convert screen-space to NDC + vec2 ndc = vec2( + position.x / screenWidth * 2.0 - 1.0, + 1.0 - position.y / screenHeight * 2.0 + ); + gl_Position = vec4(ndc, 0.0, 1.0); + fragColor = color; + gl_PointSize = pointSize; +} From 80d81273d52b8811e0ed974cc3a57ca8cf159ef5 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 15 Oct 2025 00:22:41 +0000 Subject: [PATCH 060/188] UI --- pufferlib/ocean/constellation/constellation.c | 342 ++++++++++-------- 1 file changed, 186 insertions(+), 156 deletions(-) diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index 5f1ae743b..1113549cf 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -10,6 +10,7 @@ #define RAYGUI_IMPLEMENTATION #include "raygui.h" +#include "rcamera.h" #if defined(PLATFORM_DESKTOP) || defined(PLATFORM_DESKTOP_SDL) #if defined(GRAPHICS_API_OPENGL_ES2) @@ -35,6 +36,40 @@ #include "rlgl.h" #include "raymath.h" +#define CAMERA_ORBITAL_SPEED 0.1f +#define CAMERA_MOUSE_MOVE_SENSITIVITY 0.005f +#define CAMERA_MOVE_SPEED 5.4f +#define CAMERA_ROTATION_SPEED 0.03f +#define CAMERA_PAN_SPEED 0.2f + +// Camera mouse movement sensitivity +#define CAMERA_MOUSE_MOVE_SENSITIVITY 0.003f +void CustomUpdateCamera(Camera *camera, int mode) +{ + Vector2 mousePositionDelta = GetMouseDelta(); + + bool moveInWorldPlane = ((mode == CAMERA_FIRST_PERSON) || (mode == CAMERA_THIRD_PERSON)); + bool rotateAroundTarget = ((mode == CAMERA_THIRD_PERSON) || (mode == CAMERA_ORBITAL)); + bool lockView = ((mode == CAMERA_FREE) || (mode == CAMERA_FIRST_PERSON) || (mode == CAMERA_THIRD_PERSON) || (mode == CAMERA_ORBITAL)); + bool rotateUp = false; + + // Camera speeds based on frame time + float cameraMoveSpeed = CAMERA_MOVE_SPEED*GetFrameTime(); + float cameraRotationSpeed = CAMERA_ROTATION_SPEED*GetFrameTime(); + float cameraPanSpeed = CAMERA_PAN_SPEED*GetFrameTime(); + float cameraOrbitalSpeed = CAMERA_ORBITAL_SPEED*GetFrameTime(); + + // Orbital can just orbit + Matrix rotation = MatrixRotate(GetCameraUp(camera), cameraOrbitalSpeed); + Vector3 view = Vector3Subtract(camera->position, camera->target); + view = Vector3Transform(view, rotation); + camera->position = Vector3Add(camera->target, view); + // Zoom target distance + CameraMoveToTarget(camera, -GetMouseWheelMove()); + if (IsKeyPressed(KEY_KP_SUBTRACT)) CameraMoveToTarget(camera, 2.0f); + if (IsKeyPressed(KEY_KP_ADD)) CameraMoveToTarget(camera, -2.0f); +} + const Color PUFF_RED = (Color){187, 0, 0, 255}; const Color PUFF_CYAN = (Color){0, 187, 187, 255}; const Color PUFF_WHITE = (Color){241, 241, 241, 241}; @@ -53,7 +88,7 @@ const float EMPTY = -4242.0f; typedef struct Particle { float x; float y; - float period; + float i; float r; float g; float b; @@ -105,12 +140,22 @@ Hyper* get_hyper(Dataset *data, char *env, char* hyper) { } // TODO: Slow as fuck +/* Color rgb(float h) { float r = fmaxf(0.f, fminf(1.f, fabsf(fmodf(h * 6.f, 6.f) - 3.f) - 1.f)); float g = fmaxf(0.f, fminf(1.f, fabsf(fmodf(h * 6.f + 4.f, 6.f) - 3.f) - 1.f)); float b = fmaxf(0.f, fminf(1.f, fabsf(fmodf(h * 6.f + 2.f, 6.f) - 3.f) - 1.f)); + //return (Color){255.f, 255.f, 255.f, 255}; return (Color){r * 255.f + .5f, g * 255.f + .5f, b * 255.f + .5f, 255}; } +*/ + +Color rgb(float h) { + //return ColorFromHSV(180, h, 1.0); + h = 120.0f * (1.0 + h); + //return ColorFromHSV(h, 1.0, 1.0); + return ColorFromHSV(h, 0.8f, 0.15f); +} typedef struct PlotArgs { float x_min; @@ -127,8 +172,10 @@ typedef struct PlotArgs { int legend_font_size; int line_width; int tick_length; - int x_margin; - int y_margin; + int top_margin; + int bottom_margin; + int left_margin; + int right_margin; int tick_margin; Color font_color; Color background_color; @@ -156,8 +203,10 @@ PlotArgs DEFAULT_PLOT_ARGS = { .line_width = 2, .tick_length = 8, .tick_margin = 8, - .x_margin = 100, - .y_margin = 70, + .top_margin = 70, + .bottom_margin = 70, + .left_margin = 100, + .right_margin = 100, .font_color = PUFF_WHITE, .background_color = PUFF_BACKGROUND, .axis_color = PUFF_WHITE, @@ -199,23 +248,21 @@ const char* format_tick_label(double value) { } void draw_axes(PlotArgs args) { - int width = args.width; - int height = args.height; - - // Draw axes - DrawLine(args.x_margin, args.y_margin, - args.x_margin, height - args.y_margin, PUFF_WHITE); - DrawLine(args.x_margin, height - args.y_margin, - width - args.x_margin, height - args.y_margin, PUFF_WHITE); + DrawLine(args.left_margin, args.top_margin, + args.left_margin, args.height - args.bottom_margin, PUFF_WHITE); + DrawLine(args.left_margin, args.height - args.bottom_margin, + args.width - args.right_margin, args.height - args.bottom_margin, PUFF_WHITE); +} +void draw_labels(PlotArgs args) { // X label Vector2 x_font_size = MeasureTextEx(args.font, args.x_label, args.axis_font_size, 0); DrawTextEx( args.font, args.x_label, (Vector2){ - width/2 - x_font_size.x/2, - height - x_font_size.y, + args.width/2 - x_font_size.x/2, + args.height - x_font_size.y, }, args.axis_font_size, 0, @@ -229,7 +276,7 @@ void draw_axes(PlotArgs args) { args.y_label, (Vector2){ 0, - height/2 + y_font_size.x/2 + args.height/2 + y_font_size.x/2 }, (Vector2){ 0, 0 }, -90, @@ -237,22 +284,31 @@ void draw_axes(PlotArgs args) { 0, PUFF_WHITE ); +} + + +void draw_ticks(PlotArgs args) { + int width = args.width; + int height = args.height; + + float plot_width = width - args.left_margin - args.right_margin; + float plot_height = height - args.top_margin - args.bottom_margin; // Autofit number of ticks Vector2 tick_label_size = MeasureTextEx(args.font, "estimate", args.axis_font_size, 0); - int num_x_ticks = 1 + (width - 2*args.x_margin)/tick_label_size.x; - int num_y_ticks = 1 + (height - 2*args.y_margin)/tick_label_size.y; + int num_x_ticks = 1 + plot_width/tick_label_size.x; + int num_y_ticks = 1 + plot_height/tick_label_size.y; // X ticks for (int i=0; iary[i]) : y->ary[i]; // Map to screen coordinates with margins - xi = args.x_margin + (xi - x_min) / dx * (width - 2 * args.x_margin); - yi = (height - args.y_margin) - (yi - y_min) / dy * (height - 2 * args.y_margin); + xi = args.left_margin + (xi - x_min) / dx * plot_width; + yi = args.height - args.bottom_margin - (yi - y_min) / dy * plot_height; particles[i].x = xi; particles[i].y = yi; - particles[i].period = 10.0f; + particles[i].i = i; Color c = rgb(cmap[i]); particles[i].r = c.r/255.0f; particles[i].g = c.g/255.0f; @@ -624,33 +658,6 @@ void plot(Shader shader, Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs ar VertexBuffer buffer = {&particles, MAX_PARTICLES}; plot_gl(shader, buffer); } -/* -void plot(Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, float* cmap, bool* filter) { - assert(x->n == y->n); - - int width = args.width; - int height = args.height; - float x_min = args.x_min; - float x_max = args.x_max; - float y_min = args.y_min; - float y_max = args.y_max; - - float dx = x_max - x_min; - float dy = y_max - y_min; - - for (int i=0; in; i++) { - if (filter != NULL && !filter[i]) { - continue; - } - float xi = log_x ? log10(x->ary[i]) : x->ary[i]; - float yi = log_y ? log10(y->ary[i]) : y->ary[i]; - xi = args.x_margin + (xi - x_min) / dx * (width - 2*args.x_margin); - yi = (height - args.y_margin) - (yi - y_min) / dy * (height - 2*args.y_margin); - Color c = rgb(cmap[i]); - DrawCircle(xi, yi, args.line_width, c); - } -} -*/ void plot3(Camera3D camera, Shader shader, Hyper* x, Hyper* y, Hyper* z, bool log_x, bool log_y, bool log_z, PlotArgs args, float* cmap, bool* filter) { @@ -710,7 +717,7 @@ void plot3(Camera3D camera, Shader shader, Hyper* x, Hyper* y, Hyper* z, particles[i].x = screen_pos.x; particles[i].y = screen_pos.y; - particles[i].period = 10.0f; + particles[i].i = i; c = rgb(cmap[i]); particles[i].r = c.r/255.0f; particles[i].g = c.g/255.0f; @@ -947,7 +954,7 @@ int main(void) { DEFAULT_PLOT_ARGS.font_small = LoadFontEx("resources/shared/JetBrainsMono-SemiBold.ttf", 16, NULL, 255); Camera3D camera = (Camera3D){ 0 }; - camera.position = (Vector3){ 2.0f, 2.0f, 2.0f }; + camera.position = (Vector3){ 1.5f, 1.25f, 1.5f }; camera.target = (Vector3){ 0.5f, 0.5f, 0.5f }; camera.up = (Vector3){ 0.0f, 1.0f, 0.0f }; camera.fovy = 45.0f; @@ -972,6 +979,8 @@ int main(void) { PlotArgs args2 = DEFAULT_PLOT_ARGS; RenderTexture2D fig2 = LoadRenderTexture(args2.width, args2.height); //SetTextureFilter(fig2.texture, TEXTURE_FILTER_POINT); + args2.left_margin = 50; + args2.right_margin = 50; int fig2_env_idx = 1; bool fig2_env_active = false; bool fig2_x_active = false; @@ -986,6 +995,10 @@ int main(void) { PlotArgs args3 = DEFAULT_PLOT_ARGS; RenderTexture2D fig3 = LoadRenderTexture(args3.width, args3.height); + args3.left_margin = 10; + args3.right_margin = 10; + args3.top_margin = 10; + args3.bottom_margin = 10; bool fig3_range1_active = false; int fig3_range1_idx = 2; char fig3_range1_min[32]; @@ -1035,6 +1048,7 @@ int main(void) { focus = GetMousePosition(); } + // Figure 1 x_label = hyper_key[fig1_x_idx]; y_label = hyper_key[fig1_y_idx]; z_label = hyper_key[fig1_z_idx]; @@ -1081,32 +1095,15 @@ int main(void) { cmap[j] = i/(float)data.n; } } - //rlSetBlendMode(RL_BLEND_ADDITIVE); //BeginShaderMode(shader); plot3(camera, shader, x, y, z, fig1_x_log, fig1_y_log, fig1_z_log, args1, cmap, NULL); //EndShaderMode(); - //rlSetBlendMode(RL_BLEND_ALPHA); } BeginMode3D(camera); - UpdateCamera(&camera, CAMERA_ORBITAL); - - + CustomUpdateCamera(&camera, CAMERA_ORBITAL); draw_axes3(args1, fig1_x_log, fig1_y_log, fig1_z_log); EndMode3D(); EndTextureMode(); - DrawTextureRec( - fig1.texture, - (Rectangle){0, 0, fig1.texture.width, -fig1.texture.height }, - (Vector2){ 0, SETTINGS_HEIGHT }, WHITE - ); - Rectangle fig1_env_rect = {0, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; - if (GuiDropdownBox(fig1_env_rect, env_options, &fig1_env_idx, fig1_env_active)){ - fig1_env_active = !fig1_env_active; - } - GuiDropdownCheckbox(DROPDOWN_WIDTH, 0, options, &fig1_x_idx, &fig1_x_active, "Log X", &fig1_x_log); - GuiDropdownCheckbox(2*DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, options, &fig1_y_idx, &fig1_y_active, "Log Y", &fig1_y_log); - GuiDropdownCheckbox(3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, options, &fig1_z_idx, &fig1_z_active, "Log Z", &fig1_z_log); - GuiDropdownCheckbox(4*DROPDOWN_WIDTH + 3*TOGGLE_WIDTH, 0, env_hyper_options, &fig1_color_idx, &fig1_color_active, "Log Color", &fig1_log_color); // Figure 2 @@ -1114,6 +1111,7 @@ int main(void) { y_label = hyper_key[fig2_y_idx]; args2.x_label = x_label; args2.y_label = y_label; + args2.top_margin = 20; BeginTextureMode(fig2); ClearBackground(PUFF_BACKGROUND); @@ -1162,20 +1160,8 @@ int main(void) { //rlSetBlendMode(RL_BLEND_ALPHA); draw_axes(args2); + draw_ticks(args2); EndTextureMode(); - DrawTextureRec( - fig2.texture, - (Rectangle){ 0, 0, fig2.texture.width, -fig2.texture.height }, - (Vector2){ fig1.texture.width, SETTINGS_HEIGHT }, WHITE - ); - Rectangle fig2_env_rect = {fig1.texture.width, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; - if (GuiDropdownBox(fig2_env_rect, env_options, &fig2_env_idx, fig2_env_active)){ - fig2_env_active = !fig2_env_active; - } - GuiDropdownCheckbox(fig1.texture.width + DROPDOWN_WIDTH, 0, options, &fig2_x_idx, &fig2_x_active, "Log X", &fig2_x_log); - GuiDropdownCheckbox(fig1.texture.width + 2*DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, options, &fig2_y_idx, &fig2_y_active, "Log Y", &fig2_y_log); - GuiDropdownCheckbox(fig1.texture.width + 3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, env_hyper_options, &fig2_color_idx, &fig2_color_active, "Log Color", &fig2_log_color); - // Figure 3 args3.x_label = "tsne1"; @@ -1203,22 +1189,16 @@ int main(void) { apply_filter(filter, filter_param_2, fig3_range2_min_val, fig3_range2_max_val); plot(shader, x, y, false, false, args3, cmap, filter); } - draw_axes(args3); + //draw_axes(args3); EndTextureMode(); - DrawTextureRec( - fig3.texture, - (Rectangle){ 0, 0, fig3.texture.width, -fig3.texture.height }, - (Vector2){ 0, SETTINGS_HEIGHT + fig1.texture.height }, WHITE - ); - GuiDropdownFilter(0, fig1.texture.height, options, &fig3_range1_idx, &fig3_range1_active, focus, - fig3_range1_min, &fig3_range1_min_val, fig3_range1_max, &fig3_range1_max_val); - GuiDropdownFilter(2*DROPDOWN_WIDTH, fig1.texture.height, options, &fig3_range2_idx, &fig3_range2_active, focus, - fig3_range2_min, &fig3_range2_min_val, fig3_range2_max, &fig3_range2_max_val); // Figure 4 args4.x_label = "Value"; args4.y_label = "Hyperparameter"; - args4.x_margin = 200; + args4.left_margin = 170; + args4.right_margin = 50; + args4.top_margin = 10; + args4.bottom_margin = 50; args4.x_min = 1e-8; args4.x_max = 1e8; BeginTextureMode(fig4); @@ -1240,18 +1220,68 @@ int main(void) { } } EndBlendMode(); - draw_box_axes(hyper_key, hyper_count, args4); + draw_axes(args4); + draw_box_ticks(hyper_key, hyper_count, args4); EndTextureMode(); + + + // Figure 1-4 + DrawTextureRec( + fig1.texture, + (Rectangle){0, 0, fig1.texture.width, -fig1.texture.height }, + (Vector2){ 0, SETTINGS_HEIGHT }, WHITE + ); + DrawTextureRec( + fig2.texture, + (Rectangle){ 0, 0, fig2.texture.width, -fig2.texture.height }, + (Vector2){ fig1.texture.width, 2*SETTINGS_HEIGHT }, WHITE + ); + DrawTextureRec( + fig3.texture, + (Rectangle){ 0, 0, fig3.texture.width, -fig3.texture.height }, + (Vector2){ 0, 2*SETTINGS_HEIGHT + fig1.texture.height }, WHITE + ); DrawTextureRec( fig4.texture, (Rectangle){ 0, 0, fig4.texture.width, -fig4.texture.height }, (Vector2){ fig1.texture.width, fig1.texture.height + 2*SETTINGS_HEIGHT }, WHITE ); - GuiDropdownFilter(fig1.texture.width, fig1.texture.height + SETTINGS_HEIGHT, options, &fig4_range1_idx, &fig4_range1_active, focus, + + + // Figure 3 UI + GuiDropdownFilter(0, SETTINGS_HEIGHT, options, &fig3_range1_idx, &fig3_range1_active, focus, + fig3_range1_min, &fig3_range1_min_val, fig3_range1_max, &fig3_range1_max_val); + GuiDropdownFilter(2*DROPDOWN_WIDTH, SETTINGS_HEIGHT, options, &fig3_range2_idx, &fig3_range2_active, focus, + fig3_range2_min, &fig3_range2_min_val, fig3_range2_max, &fig3_range2_max_val); + + + // Figure 4 UI + GuiDropdownFilter(fig1.texture.width, SETTINGS_HEIGHT, options, &fig4_range1_idx, &fig4_range1_active, focus, fig4_range1_min, &fig4_range1_min_val, fig4_range1_max, &fig4_range1_max_val); - GuiDropdownFilter(fig1.texture.width + 2*DROPDOWN_WIDTH, fig1.texture.height + SETTINGS_HEIGHT, options, &fig4_range2_idx, &fig4_range2_active, focus, + GuiDropdownFilter(fig1.texture.width + 2*DROPDOWN_WIDTH, SETTINGS_HEIGHT, options, &fig4_range2_idx, &fig4_range2_active, focus, fig4_range2_min, &fig4_range2_min_val, fig4_range2_max, &fig4_range2_max_val); + + // Figure 1 UI + Rectangle fig1_env_rect = {0, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig1_env_rect, env_options, &fig1_env_idx, fig1_env_active)){ + fig1_env_active = !fig1_env_active; + } + GuiDropdownCheckbox(DROPDOWN_WIDTH, 0, options, &fig1_x_idx, &fig1_x_active, "Log X", &fig1_x_log); + GuiDropdownCheckbox(2*DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, options, &fig1_y_idx, &fig1_y_active, "Log Y", &fig1_y_log); + GuiDropdownCheckbox(3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, options, &fig1_z_idx, &fig1_z_active, "Log Z", &fig1_z_log); + GuiDropdownCheckbox(4*DROPDOWN_WIDTH + 3*TOGGLE_WIDTH, 0, env_hyper_options, &fig1_color_idx, &fig1_color_active, "Log Color", &fig1_log_color); + + // Figure 2 UI + Rectangle fig2_env_rect = {fig1.texture.width, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; + if (GuiDropdownBox(fig2_env_rect, env_options, &fig2_env_idx, fig2_env_active)){ + fig2_env_active = !fig2_env_active; + } + GuiDropdownCheckbox(fig1.texture.width + DROPDOWN_WIDTH, 0, options, &fig2_x_idx, &fig2_x_active, "Log X", &fig2_x_log); + GuiDropdownCheckbox(fig1.texture.width + 2*DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, options, &fig2_y_idx, &fig2_y_active, "Log Y", &fig2_y_log); + GuiDropdownCheckbox(fig1.texture.width + 3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, env_hyper_options, &fig2_color_idx, &fig2_color_active, "Log Color", &fig2_log_color); + + DrawFPS(GetScreenWidth() - 95, 10); EndDrawing(); } From ce29f538e7f362983eaa0c5f6d027256d64fbf17 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 15 Oct 2025 00:59:59 +0000 Subject: [PATCH 061/188] UI --- pufferlib/ocean/constellation/constellation.c | 13 ++++++++----- pufferlib/ocean/constellation/puffer.rgs | Bin 128 -> 8961 bytes 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index 1113549cf..65771f33c 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -103,7 +103,7 @@ typedef struct VertexBuffer { #define SEP 4 #define SETTINGS_HEIGHT 20 #define TOGGLE_WIDTH 60 -#define DROPDOWN_WIDTH 120 +#define DROPDOWN_WIDTH 136 #define BUCKETS 8 typedef struct { @@ -938,7 +938,13 @@ int main(void) { // Initialize Raylib InitWindow(2*DEFAULT_PLOT_ARGS.width, 2*DEFAULT_PLOT_ARGS.height + 2*SETTINGS_HEIGHT, "Puffer Constellation"); + + DEFAULT_PLOT_ARGS.font = LoadFontEx("resources/shared/JetBrainsMono-SemiBold.ttf", 32, NULL, 255); + DEFAULT_PLOT_ARGS.font_small = LoadFontEx("resources/shared/JetBrainsMono-SemiBold.ttf", 16, NULL, 255); + Font gui_font = LoadFontEx("resources/shared/JetBrainsMono-SemiBold.ttf", 14, NULL, 255); + GuiLoadStyle("pufferlib/ocean/constellation/puffer.rgs"); + GuiSetFont(gui_font); ClearBackground(PUFF_BACKGROUND); SetTargetFPS(60); @@ -950,9 +956,6 @@ int main(void) { glEnable(GL_PROGRAM_POINT_SIZE); #endif - DEFAULT_PLOT_ARGS.font = LoadFontEx("resources/shared/JetBrainsMono-SemiBold.ttf", 32, NULL, 255); - DEFAULT_PLOT_ARGS.font_small = LoadFontEx("resources/shared/JetBrainsMono-SemiBold.ttf", 16, NULL, 255); - Camera3D camera = (Camera3D){ 0 }; camera.position = (Vector3){ 1.5f, 1.25f, 1.5f }; camera.target = (Vector3){ 0.5f, 0.5f, 0.5f }; @@ -1282,7 +1285,7 @@ int main(void) { GuiDropdownCheckbox(fig1.texture.width + 3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, env_hyper_options, &fig2_color_idx, &fig2_color_active, "Log Color", &fig2_log_color); - DrawFPS(GetScreenWidth() - 95, 10); + //DrawFPS(GetScreenWidth() - 95, 10); EndDrawing(); } diff --git a/pufferlib/ocean/constellation/puffer.rgs b/pufferlib/ocean/constellation/puffer.rgs index 4429468bd0a1806254668d2300541b0f044fd620..95a05b0e158d7f122dbbc6b4c894ef12a2cb1572 100644 GIT binary patch literal 8961 zcmc(^Wl$Sl&^MYwi?zk2c#+~-+(L^MclY8@G`JR8pm=c)#i6)66e$kDU5jfVKxhad zxBut4U+&DEc|X28vwP0jv-8_Mb7W`scq;0={{932AO!p`p(DL`0e~j}w4nk90Du9w z`bPi&Cg6YdPXXwrthWHbGXVNOH2@2MK3(Ah0I&h*4C{Ym8~_@P{@3FEzxH?lv=sk8 zoaX?vg(k&+Uhx0N4KV;c{VNOrAOWB`8UJZW0c8JVDktE7<}Lom=06c4`@j5I8UXl@ zF#xIb|J48D{g3|v1J0!!Gi~L)SrNbzdv~RWf*IZ#glVD2dt58sLVl`s!mkBSqD2L! z2Ts3HKYfA!@hO?YQ;#Qtzv3|P8e#_QYzVcQlEUt&{=Q598uV6x{w9z>JRh?)A9QHe z8|d6#mfksE-Er4#cXG3|7To$%Jx|>GBv;2s&SdxvGdD7Q5uQ6ws^)t4Df@HB4aQ1(?JoIH8a_7%?GucOGsT@9=9KH*|F z0@8cc=jf=>9IJWdkke!KmY(Q2v`^ToZ1kgq=u4Rj{PsapLGFd`k0PW;V%*(*N75== z>J{RZedZtq?lKUe24hdJR$zQiHx*E=!C zqs+SWOPJUFe#sDh71;ZI0tLt#kskj(68DiP*+5Gnm;oIM^oO@Gmmyc-_C>UWcuYW2 zQT%Ir-agMveoz_FxxsVM2nV0WdrGO$iV0}JFX}Uir;;sSauJIAu zf}%VH%}IzmaI_go4@zCJZX@oa9J;^1c|Fw4dBs`Ey1Nun9Qr(Fec>A0L~qZ(w5Ek& zLdQHt5SAy&fXbKErBx1IW|*h@a1Ap5AzGxUT4~|Di1k&7?`Lu^7O|A~{gBV558JdK z`Hsl8_{h6)3&!SquaGFJ04U+!a^8G&d5cIkqZQ|&5pWP}S zq(6tImvT3MrBLwwm-($QnXm-rj3{HrRL@(!0Oe_7+ecOV#dpcE_lP>o z+IZt&OK{>4p-g<&!b~IQCFI=?iGkSdPCQ2r*^xiKTAmYytx?+Sx1m}X*1_GZ$0t@p z*%a37aNfPj4bAWD1&q>q*&kgzvgha^PhMy+)4Z*&$|{(TdKM$AGE3}z3M~rn8NH<- z{QJ~P+-32B)>BE0l-cS*U|QLYHdw}L0s}}_(860miVm%=QnjDZbVkapimC1`>MPOH zJKS79Y4Eu>Mp3KOMvWrt%;s9^4*EC>60N50+msq(Mu|ZqOHTd=k=mmxHk_Dspy4Vl_ zE^;Ag5!7VQqi_dnheQV}g0UBx1~kLs9eW6HG3`%T`9Ho&<&@^cF7d+GP`v)wemn_9%WI9?Ban2bDt=lfmFVABs}{kPEW} zn=!~oChKE_VLXwOIo;zW7K1St3%b6km+>TZj{Vp-F7wM#x@6RrQ2G3>_{xDbRlx)3 zPW>Ak=SDqSeyJah#FPYc#<>>dPE!o|hCMFG+-$JyERYogXMm(l90$#DN2#`B)C(3m@i2XUiFQnOJCGTD*Qs{`-JS!p}U0W+;XwtO6hzJcB z%>m&?VQc3a=P!~ZXF7dA081BHo<#8sT9e@zPEMl-iXnzSpE?L?D`R;Djpk_J1O+Qn zo#SZDhw=HZk}zviPu|h>lMSl}lipsvb37pAF^+U#flVlcK_zDJGARzAg+^NS<7a% z!jbNfx4Q?-j*ao2eEvNVuw|Mg)-K4Ii(X{wQ-27vN`}2X4ynISw_M8mLg|ne6 zzC2b@==)zh9qWX-dc}QB#ciaps?6)^>k7$Cl5#5TPnQ!%vsw(gr&p@XN$+AaL8~|Gq$Q-nMD6F53Kv_ z&=Ju}qHtVZNh$jWN|%?<g$=KUUjNfMp)F?SPC?B?tl;uFLh!)Q_{myUxDwH!VOzOw=Be--I zV~ydR=`6WEz0`W*qxq13z?*82-cNp3sf*{Q*7*wcjba_)y|j4smuJTZdkC7tBc9)$ zIDaOHnj4rHzIz>i?roHxkV3Rk>YIvDg4pSted=(ye)oQXevIINgHEDci@_TxciUzY{ zvP>(-R5M9!U#oLn_;Uc?f?|g~j9;yc9^o6any5qyN@xlAcsa9hs&xhR75%H0l+raDzYWB>Bk=s7f@&D}d)03h&<=54#;LZ)kWxuY3=a znyEC`3gQ>fZ08rVwcnnPz&Q7a3u+xw4ACl}5pG3J!^H6%=ji}2yo&Z1)L)z_AYr9o zOxE#D<3JO#{1Eu{nxnq1h0W6&=WFb0bP%4VMOFma>v&bVik^O*V&2v_Z)Ez6Z8WSe zPF~ZB1N^bD`X>wt7YVq6#iL!Arp|j_Zz(+wPN1BbjJ9hbYF<%a#Vec~dpKezCnfI<4VoX#5_8M*l!%iVWl>J8ks@k#O{%njgWlKXH(#fh zYWnXuk5hOWY8jgfBTZvnJSroOkRUvuz>!fbXd?V(bN93TqUTgJw~b2*Q3&7)^&(?M z%j?^sbpP1Y4@AKUJHzD2?ECx)9C5Gb80qzxrAKkZ-GJ+F(wL+7vEJPYf}tMBL6SKY zJ6VEE8kaYz&U41$U&(2$;&Ga!5*+BL4tgC6h@2Fc-NGN(gC4!u4__2uo6H27klZ(u zip8IgG2F3L|ClFfu6-s3qVYXa`Mo);I}on|eD47+5Gbz9dj8f|ZryD; z>2G%;Y*aqiV5QM^9XqVSRcVE+m2I=^`^RNHhq4-fmyVv>nXxoW996nOXM!TAow!?z zQGuXQS40K*dOUN{PD?Ea3aE*7{xj<>L9ZHs8U1!p#g+73Hg5g z{$w}5C^Vw{S$Dj+?66RAcI-%?O>F_@N4rt;LVE1%)q`UydUxnvxDC6FPLD>3dCU{K zP9Q8_!{dJJk>RZ9Ws?+nd;YoWcKd3EV8u;i!t{Y~X3~9{pBh7q>)yS$qN9`s0>sU9=vmH@`KdZ7I-Jdr z1NL*x#AUpmpD7!4wlVT7$eFph|8Np_H-GO zHd96%cz}2zTIb~;&GcrHU%|rQe@=68i%O}jOW~f(`sW- zZD}IsQtTGp-fOTc_G zR0}85o=?!21gv{_ztYko6j3tt47%QV53`ElaOC`h(7PVI<8NkfFzwx_Ki6$3Gw-!GEz?{X+>lGyAume-N>CzB)bP z@Y{6Wq0O!bR~f|N4;VnP@5`knaT=W3k7bXppgFvm+&ka6wZB0iuq-P^Ic3&{kpAUhd;AbYX8m18v!Ma z4XkO^hT_>am5(!6M4>8kji)kz$nqOBX)o{pL?_J$)*NFU@=-+~CPV zORdJ^240y&P3H$+nnNKA@IxVa`RrpFR#z zE!(7R=>9G2E+ok0KlG#lH8jYv+$#gw>YP(nqRj;oJ6UGwGt>7%}1#l$Gj%I^WX zMk~O19J3IHW*E_cK7?xJXw>}X=c&0-WlsXC*)P=gW!p?h&6xkG8<#sTP)o-)SRAKe zN<{Zd;mj7mKbdvIL)I)Wkj`3T2@$-eXzC3lkQt;iYmb8HIO-XH6 zF4%|j2;f%YBkFB=i1DuTGuvJ*38AS-i8Ns!>=u^A!}bo0lk>Dws?Y|95}W#r>)lam zi>mgh!A3YFc2nO-=O=b=)jH&MHQA`lSNL=HzR^BcA6Be~Bq-_f*9_CI>qhH$)PJC2 z%?zqHr-zR(n|s(D51)-f28x9zA$66Yi32YBxy#Aj z(AF)hrH2uh?cK=7aXa~`PS(#kW*+?XIjnuRzuh4>c~n%p*Lsj3sRiBcl-xtVMbKt#rcHdx&GLSK=d>*D^N#mO|6+u#v zy|i1IwicBH36FZ$>r7PZXGh#2>KUBo;kF>4Ee|?mJS0V_Gg>+j6tW6z68trPDN5noQMP}^(nWa2T>~RBW|eIjk~>&Ohe_1#SN07K{P9#Z zchEAK16#9@iCY#zGG&80z#e06PA9v=Dof6GzV_$)d38t3KfB-=(xY&!-JVS|O-2c- zn5(@4&D$5hchVGxY@hZUs{tz+{|cwhM8kZo>xA7}#!JA{e!Dn+4+Gi4cf{cOO!dx< zoz{UT-w(+$G@gZwW|OymmKYz?HDQ=NxewQ-_ZHoh4{Sw}1wBf~UzzvhB?mJeaC?O| zu=Aoug_@bbQ)^G{Bzn~*qV2Xh_-==m9TTE-8(e%rGW=cGB;#6ZW)^32=9PO|T>_2p zhVyI$%h<^Dna(2idof2RtfJAI={;~i=F1knbov_18Hf|w1EINVX^Q9|VtZ$B)J~jnz~B_dYVLfw zuVTBZoC4hOLYP5YU2eIZkgdC^}^rRI`in?T)G7-f|pkAXaI=~IiAYaPi4sbWO zKFoz1Bqd^OS9Ano4kvsI(b)hWpNI14R$vwzdo5I3dpUU zN0p?ybzA`#NS@xj z)>}F6!~#B>w4*0-b8RbFZ1FZ8+hbfrZR+6O(F@U-p&G`{ODa8c+Y57VJvY@0$16LX z0y8AT&={@)%|Wgy>lD=SH%_ z*=bzEgf?wsZ&^Wsb*6G7#tbI4e(LU}7?0uv{guk|Op5DzqqrX~2FGKeE%Ewk_4lq{ zg4#1?7mudyN?wa)a!(M z$A9EX&8}aJQ~Ur6@QBySH=C4ed@b5hHk1TzpQSLJk&g-*hn*MF=Nk>d;gzCrv90Tj zPysmi>i~z7=%;Jjmyaeoe9_Pf4xyi`IUWjSH>!ZJ>EHUuf;SV&LlZjl8$R!~=IV77Mo z#S3um<8VKsw>4|EP_ngAv3+o_A&&&Oq}|KY_GIGEn$as5|dgC8U>E!U^paLb9pgZXc^I7WcYJ#9+k8dLPv7I&pAv z&!0hSnDZu!+@+y5Ek1Dl`KLpNtmP?d>H^eVRpav7*7!a9^l`M_8+dfW(ZR`3bK;i` zI?cS9^abzdIu}(klPXrJRKsA)T`?YPqY$19v6noY>mcQ@JUVHDY*Up442t9;o?kv? zXWUgBMe~harDcKp0_=KjS+0F_LvIKw(oPP)WN8RnxCymFXOB`Am=;2C(zage-4+tW z0RnwnSlWf4iqmI^$hV7TJr*sw_(qToJ@0uZU)Y6$N}Rd&&j$%-!>~sEA-e+6{NXh9 zD`NI6Uy*}OfTC`W?W@4hgSCYz3bKtO7>d93<5C=<^b+{x1X+W3l2c*BUh|!R4V1W> zB1YqVYE+7I3UuG?kB`57Y{jPGCQG2@%-_7&sTW7SwRbdM9)ky{qPkTU-tBo#EL>%4 z9#UrLhSYuFSObnZJFKK&`FqfgI~V>s*1z36!>I)I*)}<%eq8H!IjUYkASu#PExOVp z!%BvMp=u9V^`<-rR!Ea4ZeQtf5Z{JDxx92ab6Zza2hXf!narjt1ZY1Viofl@S?sb_ zBt(<1{W35RU$C(r?l!&>KVcm8rW6iI0MqglPnmY(EJjvQGh8>2X4`mm))K#rQBN|J zC`}EPSMxg;Q9wyyFDi>==ERcO9=@A>(dV7_hxiWQR$Cpl+uf92z8hkS;RR>tSZ52DB$uf8z3un6|S-g4s23l$ZA^~wbYt@OQ+s>&3W2@bGr$! za`9~^4Q%Rt83TJ<<6c7}eWPX~cV6wNu2^+tXXoEoK5>nWEy@Tdp+6r zwCXf)-G(Rn&PnIRX!LfO33X^&Lhko5sd%TQtEjFlbR`00;MgQU{$ zS8P8m=6K-FHwRX?`1igy*)LkV-cQPnXHM&+S7%)Pxq=U^w5r-H)#L^0JhZPO1fXn_0(;|g;zQxS+>G-yhYIHVYeLqmT=k;&u z!!3UarE8Bv=#OX5^dfLYm$Can4DMT~O`d*#|bB z_f1**!Wf`kk1P0V`b*ko7ju#l2TNJLvj<@pHX3lpd|BTP&@u$HzU$?N%CKzg1Oi{1 za8z!mXzF8j_6$85%+S^F8RLTKH0|#0OO4AlgU}s8LF7qI8K)j)Jf5X5brG9j;31a&_Z{_rtvgpG)21sZ(D_SiunPqsh z@ABrVru1=c=B89N1nW#^q>-72v!vi8h*{5S_d~e?d~Yh!gEF5HlJMd$&pnJtBWfZ0 zT)PE+ZcV9f;QF)T;u+6w@6zOpz%`vvA4v`4wrNEOchxSeekNe!bjz)^gZ&52M9`!y zMBpv&;4Kp^MT%J~hp&GRK%sT637NU-&xIv)E4%OSm``xr6>aZdMOG`^fiT(#8>Uo4 zGZsZY(6G2dA*9?%Fd`<4+PL5R?q9yp(fl;Rj73TB2C%x4DOK1h8Sn0;%LsM$I?Tly zr)SyS1(%WMpE zk)LWUJ)&C_>cSXOfm9JJS2bNZn{u2{wSDF=BH4SuhqHc|s+-CaJUNSD>Ddl&?{0XH z_l-ZVtpC|`mB_tD&cDBTv(#Q?%=d&t*hE}UmEN$ikw1CHt^G9mV|^#npguzjU8a6? zw>;f8UaYQuMRiTr>@4Fq0!yO37|F1Rw4hNg35nmy$o-s9q^+l%b*nfuq5M*gH{|ye zQM2Ssf7z>CAH(1&S|a~K#^RvEXyp_!oBa9vaA-S75d}V^#4(_MMM|- zSoWvD&GVyQ$3S}sn`ByunUw=J_?s9s)a7r&wLg7a&y6^9q;&R<#@UiBP@?u^FP*1PWWFpI&2eTE%30!T zWAP`HaGfpqn5Z%!!68;h(&>VOR)B_mC|Er{#PNo@h$0t)c6b%+ysJ@tVr6mieUrd! z=U?%0^-RSkckEJ?--$~V2rm&v5+ipzrKYYE|0RqWWbPfkdp5nKp!kDB9DV%kj$%;= z6wHT9p}+ZbfShQC2L5Ra%20_%~TA#TMNL=!CVM7P7 zFByWK(hsm+vLAr{9rB*fygQtKsd~cPb|n7Ng?y&pVf{-PGSj%$Te}Q#6P1RRqrXe+ zX*8kx9}x;D0+_{K=mZ%@rJiWbAS1(Fltym2*O_EMu6Pu Ucz(%ODdHjv;<^|+(N<*t4^aXRB>(^b delta 87 zcmZp4Yhbh~at~IRz{tSB#{dEU+e-8q7#JD;N4PODFfcKE{C^cF!OQ>=XJPoiWu_4W f11rOS28RC=ZH$D382(>4(8$0b% Date: Wed, 15 Oct 2025 13:27:36 +0000 Subject: [PATCH 062/188] shaders --- .../ocean/constellation/point_particle.fs | 24 +++++++++---------- .../ocean/constellation/point_particle.vs | 19 +++++++++++---- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/pufferlib/ocean/constellation/point_particle.fs b/pufferlib/ocean/constellation/point_particle.fs index f73386f62..9a184dafc 100644 --- a/pufferlib/ocean/constellation/point_particle.fs +++ b/pufferlib/ocean/constellation/point_particle.fs @@ -3,25 +3,23 @@ in vec4 fragColor; out vec4 finalColor; + void main() { -vec2 uv = gl_PointCoord - vec2(0.5); // center to edge -float dist = length(uv); // distance from center (0.0–0.707) - -// Optional: discard hard edge for exact circular point -if (dist > 0.5) - discard; -// Smooth exponential falloff -float glow = exp(-24.0 * dist * dist); // steeper falloff = tighter glow +vec2 uv = gl_PointCoord - vec2(0.5); +float dist = length(uv); // distance from center of point -// Bright, saturated core color -vec3 color = fragColor.rgb * glow * 5.0; +// Soft radial falloff (tightens with higher number) +float falloff = exp(-20.0 * dist * dist); -// Only output color if visible — remove black halo -if (glow < 0.01) +// Kill pixels too dark to be visible — avoids black ring +if (falloff < 0.01) discard; -finalColor = vec4(color, 1.0); +// Final color, scaled by falloff +vec3 color = fragColor.rgb * falloff * 5.0; +finalColor = vec4(color, falloff); + } diff --git a/pufferlib/ocean/constellation/point_particle.vs b/pufferlib/ocean/constellation/point_particle.vs index 2f0a38e4a..e7df6dc28 100644 --- a/pufferlib/ocean/constellation/point_particle.vs +++ b/pufferlib/ocean/constellation/point_particle.vs @@ -13,19 +13,30 @@ out vec4 fragColor; // NOTE: Add your custom variables here +float twinkle(float idx, float t) { + float base_size = 10.0; + float phase = mod(idx * 137.5, 360.0); + float frequency = 2.5 + mod(idx, 3.0) * 1.0; + float amplitude = (mod(idx, 10.0) == 0.0) ? 10.0 : 2.0; + float size_variation = amplitude * sin(frequency * t + radians(phase)); + return max(0.5, base_size + size_variation); +} + void main() { // Unpack data from vertexPosition - vec2 pos = vertexPosition.xy; - float period = vertexPosition.z; + vec2 pos = vertexPosition.xy; + float idx = vertexPosition.z; // Calculate final vertex position (jiggle it around a bit horizontally) //pos += vec2(100, 0)*sin(period*currentTime); gl_Position = mvp*vec4(pos, 0.0, 1.0); // Calculate the screen space size of this particle (also vary it over time) - //gl_PointSize = 10 - 5*abs(sin(period*currentTime)); - gl_PointSize = 10.0; + //gl_PointSize = 10 - 5*abs(sin(0.1*idx*currentTime)); + //gl_PointSize = 10.0; + + gl_PointSize = twinkle(idx, currentTime); fragColor = vertexColor; } From aa4f90f5590ae6596ea724f874909765bb95281a Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 15 Oct 2025 20:18:37 +0000 Subject: [PATCH 063/188] constellations! --- pufferlib/ocean/constellation/constellation.c | 224 +++++++++++++++++- 1 file changed, 221 insertions(+), 3 deletions(-) diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index 65771f33c..784e728e8 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -42,6 +42,7 @@ #define CAMERA_ROTATION_SPEED 0.03f #define CAMERA_PAN_SPEED 0.2f + // Camera mouse movement sensitivity #define CAMERA_MOUSE_MOVE_SENSITIVITY 0.003f void CustomUpdateCamera(Camera *camera, int mode) @@ -74,6 +75,8 @@ const Color PUFF_RED = (Color){187, 0, 0, 255}; const Color PUFF_CYAN = (Color){0, 187, 187, 255}; const Color PUFF_WHITE = (Color){241, 241, 241, 241}; const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; +const Color TRANSPARENT = (Color){0, 0, 0, 0}; +const Color CONSTELLATION = (Color){255, 255, 255, 128}; Color COLORS[] = { BLUE, MAROON, ORANGE, DARKGREEN, DARKBLUE, DARKPURPLE, DARKBROWN, @@ -731,7 +734,6 @@ void plot3(Camera3D camera, Shader shader, Hyper* x, Hyper* y, Hyper* z, VertexBuffer buffer = {&particles, idx}; plot_gl(shader, buffer); - } @@ -937,6 +939,7 @@ int main(void) { } // Initialize Raylib + SetConfigFlags(FLAG_MSAA_4X_HINT); InitWindow(2*DEFAULT_PLOT_ARGS.width, 2*DEFAULT_PLOT_ARGS.height + 2*SETTINGS_HEIGHT, "Puffer Constellation"); DEFAULT_PLOT_ARGS.font = LoadFontEx("resources/shared/JetBrainsMono-SemiBold.ttf", 32, NULL, 255); @@ -951,6 +954,10 @@ int main(void) { Shader shader = LoadShader(TextFormat("pufferlib/ocean/constellation/point_particle.vs", GLSL_VERSION), TextFormat("pufferlib/ocean/constellation/point_particle.fs", GLSL_VERSION)); + Shader blur_shader = LoadShader( + "pufferlib/ocean/constellation/blur.vs", + "pufferlib/ocean/constellation/blur.fs"); + // Allows the vertex shader to set the point size of each particle individually #ifndef GRAPHICS_API_OPENGL_ES2 glEnable(GL_PROGRAM_POINT_SIZE); @@ -964,6 +971,7 @@ int main(void) { camera.projection = CAMERA_PERSPECTIVE; PlotArgs args1 = DEFAULT_PLOT_ARGS; RenderTexture2D fig1 = LoadRenderTexture(args1.width, args1.height); + RenderTexture2D fig1_overlay = LoadRenderTexture(args1.width, args1.height); int fig1_env_idx = 0; bool fig1_env_active = false; bool fig1_x_active = false; @@ -998,6 +1006,7 @@ int main(void) { PlotArgs args3 = DEFAULT_PLOT_ARGS; RenderTexture2D fig3 = LoadRenderTexture(args3.width, args3.height); + RenderTexture2D fig3_overlay = LoadRenderTexture(args1.width, args1.height); args3.left_margin = 10; args3.right_margin = 10; args3.top_margin = 10; @@ -1031,6 +1040,18 @@ int main(void) { float fig4_range2_min_val = 0; float fig4_range2_max_val = 10000; + float perf_thresholds[4] = {0.5f, 0.75f, 0.9f, 0.95f}; + int best_srci[4]; + int best_n[4]; + int* best_idx[4]; + float* temp_dist[4]; + int* temp_idx[4]; + for (int i=0; i<4; i++) { + best_idx[i] = calloc(data.n, sizeof(int)); + temp_dist[i] = calloc(data.n, sizeof(float)); + temp_idx[i] = calloc(data.n, sizeof(int)); + } + Hyper* x; Hyper* y; Hyper* z; @@ -1102,13 +1123,88 @@ int main(void) { plot3(camera, shader, x, y, z, fig1_x_log, fig1_y_log, fig1_z_log, args1, cmap, NULL); //EndShaderMode(); } + + // Find best hypers + float tsne_thresh = 100.0f; + memset(best_n, 0, sizeof(int)*4); + memset(best_srci, 0, sizeof(int)*4); + for (int env_i=0; env_ikey, "environment/perf"); + Hyper* src_tsne1 = get_hyper(&data, src->key, "tsne1"); + Hyper* src_tsne2 = get_hyper(&data, src->key, "tsne2"); + for (int i=0; in; i++) { + float perfi = src_perf->ary[i]; + if (perfi < perf_thresholds[0]) { + continue; + } + float t1i = src_tsne1->ary[i]; + float t2i = src_tsne2->ary[i]; + for (int ki=0; ki<4; ki++) { + for (int kj=0; kjkey, "environment/perf"); + Hyper* dst_tsne1 = get_hyper(&data, dst->key, "tsne1"); + Hyper* dst_tsne2 = get_hyper(&data, dst->key, "tsne2"); + for (int j=0; jn; j++) { + float perfj = dst_perf->ary[j]; + if (perfj < perf_thresholds[0]) { + continue; + } + float t1j = dst_tsne1->ary[j]; + float t2j = dst_tsne2->ary[j]; + float t1_dist = t1i - t1j; + float t2_dist = t2i - t2j; + float tsne_dist = t1_dist*t1_dist + t2_dist*t2_dist; + if (tsne_dist > tsne_thresh) { + continue; + } + for (int thresh_idx=0; thresh_idx<4; thresh_idx++) { + float perf_thresh = perf_thresholds[thresh_idx]; + if (perfi < perf_thresh || perfj < perf_thresh) { + break; + } + if (temp_idx[thresh_idx][env_j] == -1 || tsne_dist < temp_dist[thresh_idx][env_j]) { + temp_idx[thresh_idx][env_j] = j; + temp_dist[thresh_idx][env_j] = tsne_dist; + } + } + } + } + + for (int ki=0; ki<4; ki++) { + int temp_n = 0; + for (int kj=0; kj best_n[ki]) { + best_n[ki] = temp_n; + best_srci[ki] = env_i; + for (int kj=0; kjary[src_idx]; + float yi = y->ary[src_idx]; + float zi = z->ary[src_idx]; + + xi = (fig1_x_log) ? signed_log10(xi) : xi; + yi = (fig1_y_log) ? signed_log10(yi) : yi; + zi = (fig1_z_log) ? signed_log10(zi) : zi; + + Vector3 src_point = (Vector3){ + (xi - x_min)/(x_max - x_min), + (yi - y_min)/(y_max - y_min), + (zi - z_min)/(z_max - z_min) + }; + + Vector2 screen_i = GetWorldToScreenEx(src_point, camera, 960, 520); + + for (int i=0; iary[bdi]; + float yj = y->ary[bdi]; + float zj = z->ary[bdi]; + + xj = (fig1_x_log) ? signed_log10(xj) : xj; + yj = (fig1_y_log) ? signed_log10(yj) : yj; + zj = (fig1_z_log) ? signed_log10(zj) : zj; + + Vector3 dst_point = (Vector3){ + (xj - x_min)/(x_max - x_min), + (yj - y_min)/(y_max - y_min), + (zj - z_min)/(z_max - z_min) + }; + Vector2 screen_j = GetWorldToScreenEx(dst_point, camera, 960, 520); + DrawLineEx( + (Vector2){screen_i.x, screen_i.y}, + (Vector2){screen_j.x, screen_j.y}, + 2, + CONSTELLATION + ); + } + } + + + // Figure 3 Overlay + float offset = fig1.texture.height + 2*SETTINGS_HEIGHT; + for (int k=0; k<4; k++) { + int bsi = best_srci[k]; + char* src_env = data.envs[bsi].key; + int src_idx = best_idx[k][bsi]; + x = get_hyper(&data, src_env, "tsne1"); + y = get_hyper(&data, src_env, "tsne2"); + float xi = x->ary[src_idx]; + float yi = y->ary[src_idx]; + + xi = args3.left_margin + args3.width*(xi - args3.x_min)/(args3.x_max - args3.x_min); + yi = offset + args3.height - args3.bottom_margin - args3.height*(yi - args3.y_min)/(args3.y_max - args3.y_min); + + for (int i=0; iary[bdi]; + float yj = y->ary[bdi]; + + xj = args3.left_margin + args3.width*(xj - args3.x_min)/(args3.x_max - args3.x_min); + yj = offset + args3.height - args3.bottom_margin - args3.height*(yj - args3.y_min)/(args3.y_max - args3.y_min); + + DrawLineEx( + (Vector2){xi, yi}, + (Vector2){xj, yj}, + 2, + CONSTELLATION + ); + } + + float tsne_thresh_px = sqrt(tsne_thresh)*args3.width/(args3.x_max - args3.x_min); + //DrawCircleLines(xi, yi, tsne_thresh_px, CONSTELLATION); + } + // Figure 3 UI GuiDropdownFilter(0, SETTINGS_HEIGHT, options, &fig3_range1_idx, &fig3_range1_active, focus, From c746d1a6d4dee9e27c4a886afaad34df61f77021 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 15 Oct 2025 23:25:09 +0000 Subject: [PATCH 064/188] minor refactor --- pufferlib/ocean/constellation/constellation.c | 58 ++----------------- 1 file changed, 6 insertions(+), 52 deletions(-) diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index 784e728e8..df69e1a9f 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -36,7 +36,7 @@ #include "rlgl.h" #include "raymath.h" -#define CAMERA_ORBITAL_SPEED 0.1f +#define CAMERA_ORBITAL_SPEED 0.025f #define CAMERA_MOUSE_MOVE_SENSITIVITY 0.005f #define CAMERA_MOVE_SPEED 5.4f #define CAMERA_ROTATION_SPEED 0.03f @@ -71,6 +71,10 @@ void CustomUpdateCamera(Camera *camera, int mode) if (IsKeyPressed(KEY_KP_ADD)) CameraMoveToTarget(camera, -2.0f); } +#define SETTINGS_HEIGHT 20 +#define TOGGLE_WIDTH 60 +#define DROPDOWN_WIDTH 136 + const Color PUFF_RED = (Color){187, 0, 0, 255}; const Color PUFF_CYAN = (Color){0, 187, 187, 255}; const Color PUFF_WHITE = (Color){241, 241, 241, 241}; @@ -78,12 +82,6 @@ const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; const Color TRANSPARENT = (Color){0, 0, 0, 0}; const Color CONSTELLATION = (Color){255, 255, 255, 128}; -Color COLORS[] = { - BLUE, MAROON, ORANGE, DARKGREEN, DARKBLUE, DARKPURPLE, DARKBROWN, - GRAY, RED, GOLD, LIME, VIOLET, LIGHTGRAY, PINK, YELLOW, - GREEN, SKYBLUE, PURPLE, BEIGE -}; - const float EMPTY = -4242.0f; #define MAX_PARTICLES 1000 @@ -103,12 +101,6 @@ typedef struct VertexBuffer { int n; } VertexBuffer; -#define SEP 4 -#define SETTINGS_HEIGHT 20 -#define TOGGLE_WIDTH 60 -#define DROPDOWN_WIDTH 136 -#define BUCKETS 8 - typedef struct { char *key; float *ary; @@ -142,22 +134,8 @@ Hyper* get_hyper(Dataset *data, char *env, char* hyper) { return NULL; } -// TODO: Slow as fuck -/* -Color rgb(float h) { - float r = fmaxf(0.f, fminf(1.f, fabsf(fmodf(h * 6.f, 6.f) - 3.f) - 1.f)); - float g = fmaxf(0.f, fminf(1.f, fabsf(fmodf(h * 6.f + 4.f, 6.f) - 3.f) - 1.f)); - float b = fmaxf(0.f, fminf(1.f, fabsf(fmodf(h * 6.f + 2.f, 6.f) - 3.f) - 1.f)); - //return (Color){255.f, 255.f, 255.f, 255}; - return (Color){r * 255.f + .5f, g * 255.f + .5f, b * 255.f + .5f, 255}; -} -*/ - Color rgb(float h) { - //return ColorFromHSV(180, h, 1.0); - h = 120.0f * (1.0 + h); - //return ColorFromHSV(h, 1.0, 1.0); - return ColorFromHSV(h, 0.8f, 0.15f); + return ColorFromHSV(120*(1.0 + h), 0.8f, 0.15f); } typedef struct PlotArgs { @@ -546,12 +524,6 @@ void boxplot(Hyper* hyper, bool log_x, int i, int hyper_count, PlotArgs args, Co DrawRectangle(left, args.top_margin + i*dy, right - left, dy, faded); } -// Struct for vertex data (screen-space position and color) -typedef struct { - Vector2 pos; // Screen-space x, y - Color color; // RGBA color -} PlotVertex; - void plot_gl(Shader shader, VertexBuffer vertices) { Particle* particles = vertices.vertices; int n = vertices.n; @@ -576,25 +548,14 @@ void plot_gl(Shader shader, VertexBuffer vertices) { rlSetBlendMode(RL_BLEND_ADDITIVE); int currentTimeLoc = GetShaderLocation(shader, "currentTime"); - //float time = GetTime(); - //SetShaderValue(shader, currentTimeLoc, &time, SHADER_UNIFORM_FLOAT); - // Switch to plain OpenGL - //------------------------------------------------------------------------------ glUseProgram(shader.id); - glUniform1f(currentTimeLoc, GetTime()); - - // Get the current modelview and projection matrix so the particle system is displayed and transformed Matrix modelViewProjection = MatrixMultiply(rlGetMatrixModelview(), rlGetMatrixProjection()); - glUniformMatrix4fv(shader.locs[SHADER_LOC_MATRIX_MVP], 1, false, MatrixToFloat(modelViewProjection)); - glBindVertexArray(vao); glDrawArrays(GL_POINTS, 0, n); glBindVertexArray(0); - glUseProgram(0); - //------------------------------------------------------------------------------ glDeleteBuffers(1, &vbo); glDeleteVertexArrays(1, &vao); rlSetBlendMode(RL_BLEND_ALPHA); @@ -610,10 +571,6 @@ void plot(Shader shader, Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs ar float plot_height = height - args.top_margin - args.bottom_margin; // Compute ranges and apply log scaling if needed - //float x_min = log_x ? log10f(args.x_min) : args.x_min; - //float x_max = log_x ? log10f(args.x_max) : args.x_max; - //float y_min = log_y ? log10f(args.y_min) : args.y_min; - //float y_max = log_y ? log10f(args.y_max) : args.y_max; float x_min = args.x_min; float x_max = args.x_max; float y_min = args.y_min; @@ -631,7 +588,6 @@ void plot(Shader shader, Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs ar if (valid_count == 0) return; // Early exit if no points // Allocate vertex array - PlotVertex* vertices = (PlotVertex*)malloc(valid_count * sizeof(PlotVertex)); int idx = 0; // Preprocess points: transform and map to screen space @@ -776,8 +732,6 @@ void GuiDropdownFilter(int x, int y, char* options, int *selection, bool *dropdo } } - - void apply_filter(bool* filter, Hyper* param, float min, float max) { for (int i=0; in; i++) { float val = param->ary[i]; From 48c5329959c7c86142731c9b6cb6d0d1d8059296 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 16 Oct 2025 17:51:18 +0000 Subject: [PATCH 065/188] Tooltip prototype --- pufferlib/ocean/constellation/constellation.c | 76 +++++++++++++++++-- 1 file changed, 70 insertions(+), 6 deletions(-) diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index df69e1a9f..1580d0c7e 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -85,6 +85,7 @@ const Color CONSTELLATION = (Color){255, 255, 255, 128}; const float EMPTY = -4242.0f; #define MAX_PARTICLES 1000 +#define MAX_POINTS 10000 typedef struct Particle { float x; @@ -101,6 +102,18 @@ typedef struct VertexBuffer { int n; } VertexBuffer; +typedef struct { + float x; + float y; + int env_idx; + int ary_idx; +} ScreenPoint; + +typedef struct { + ScreenPoint points[MAX_POINTS]; + int n; +} RenderedPoints; + typedef struct { char *key; float *ary; @@ -561,7 +574,8 @@ void plot_gl(Shader shader, VertexBuffer vertices) { rlSetBlendMode(RL_BLEND_ALPHA); } -void plot(Shader shader, Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs args, float* cmap, bool* filter) { +void plot(RenderedPoints* rendered_points, int rxoff, int ryoff, Shader shader, Hyper* x, Hyper* y, + bool log_x, bool log_y, PlotArgs args, float* cmap, bool* filter) { assert(x->n == y->n); int width = args.width; @@ -611,6 +625,15 @@ void plot(Shader shader, Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs ar particles[i].g = c.g/255.0f; particles[i].b = c.b/255.0f; particles[i].a = c.a/255.0f; + + rendered_points->points[rendered_points->n] = (ScreenPoint){ + rxoff + xi, + ryoff + yi, + rendered_points->points[rendered_points->n].env_idx, + idx + }; + rendered_points->n++; + idx++; } @@ -618,7 +641,7 @@ void plot(Shader shader, Hyper* x, Hyper* y, bool log_x, bool log_y, PlotArgs ar plot_gl(shader, buffer); } -void plot3(Camera3D camera, Shader shader, Hyper* x, Hyper* y, Hyper* z, +void plot3(RenderedPoints* rendered_points, Camera3D camera, Shader shader, Hyper* x, Hyper* y, Hyper* z, bool log_x, bool log_y, bool log_z, PlotArgs args, float* cmap, bool* filter) { assert(x->n == y->n && x->n == z->n); float x_min = args.x_min; @@ -682,9 +705,17 @@ void plot3(Camera3D camera, Shader shader, Hyper* x, Hyper* y, Hyper* z, particles[i].g = c.g/255.0f; particles[i].b = c.b/255.0f; particles[i].a = c.a/255.0f; - idx++; + + rendered_points->points[rendered_points->n] = (ScreenPoint){ + screen_pos.x, + screen_pos.y, + rendered_points->points[rendered_points->n].env_idx, + idx + }; + rendered_points->n++; //DrawBillboard(camera, whiteTexture, point, 0.1f, c); + idx++; } VertexBuffer buffer = {&particles, idx}; @@ -892,6 +923,9 @@ int main(void) { strcat(env_options, data.envs[i].key); } + // Rendered points + RenderedPoints rendered_points = {0}; + // Initialize Raylib SetConfigFlags(FLAG_MSAA_4X_HINT); InitWindow(2*DEFAULT_PLOT_ARGS.width, 2*DEFAULT_PLOT_ARGS.height + 2*SETTINGS_HEIGHT, "Puffer Constellation"); @@ -1019,6 +1053,8 @@ int main(void) { Vector2 focus = {0, 0}; while (!WindowShouldClose()) { + rendered_points.n = 0; + BeginDrawing(); ClearBackground(PUFF_BACKGROUND); @@ -1074,7 +1110,10 @@ int main(void) { } } //BeginShaderMode(shader); - plot3(camera, shader, x, y, z, fig1_x_log, fig1_y_log, fig1_z_log, args1, cmap, NULL); + for (int j=0; jn; j++) { + rendered_points.points[rendered_points.n + j].env_idx = i; + } + plot3(&rendered_points, camera, shader, x, y, z, fig1_x_log, fig1_y_log, fig1_z_log, args1, cmap, NULL); //EndShaderMode(); } @@ -1207,7 +1246,10 @@ int main(void) { cmap[j] = i/(float)data.n; } } - plot(shader, x, y, fig2_x_log, fig2_y_log, args2, cmap, NULL); + for (int j=0; jn; j++) { + rendered_points.points[rendered_points.n + j].env_idx = i; + } + plot(&rendered_points, fig1.texture.width, 0, shader, x, y, fig2_x_log, fig2_y_log, args2, cmap, NULL); } //EndShaderMode(); //rlSetBlendMode(RL_BLEND_ALPHA); @@ -1240,7 +1282,10 @@ int main(void) { apply_filter(filter, filter_param_1, fig3_range1_min_val, fig3_range1_max_val); Hyper* filter_param_2 = get_hyper(&data, env, hyper_key[fig3_range2_idx]); apply_filter(filter, filter_param_2, fig3_range2_min_val, fig3_range2_max_val); - plot(shader, x, y, false, false, args3, cmap, filter); + for (int j=0; jn; j++) { + rendered_points.points[rendered_points.n + j].env_idx = i; + } + plot(&rendered_points, 0, fig1.texture.height, shader, x, y, false, false, args3, cmap, filter); } //draw_axes(args3); @@ -1456,6 +1501,25 @@ int main(void) { GuiDropdownCheckbox(fig1.texture.width + 2*DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, options, &fig2_y_idx, &fig2_y_active, "Log Y", &fig2_y_log); GuiDropdownCheckbox(fig1.texture.width + 3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, env_hyper_options, &fig2_color_idx, &fig2_color_active, "Log Color", &fig2_log_color); + // Tooltip + float x = focus.x; + float y = focus.y; + float min_dist = FLT_MAX; + int min_i = 0; + printf("N points: %d\n", rendered_points.n); + for (int i=0; i Date: Fri, 17 Oct 2025 19:06:13 +0000 Subject: [PATCH 066/188] Initial refactor --- pufferlib/ocean/constellation/constellation.c | 873 ++++++++---------- 1 file changed, 377 insertions(+), 496 deletions(-) diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index 1580d0c7e..b27f53053 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -37,35 +37,12 @@ #include "raymath.h" #define CAMERA_ORBITAL_SPEED 0.025f -#define CAMERA_MOUSE_MOVE_SENSITIVITY 0.005f -#define CAMERA_MOVE_SPEED 5.4f -#define CAMERA_ROTATION_SPEED 0.03f -#define CAMERA_PAN_SPEED 0.2f - - -// Camera mouse movement sensitivity -#define CAMERA_MOUSE_MOVE_SENSITIVITY 0.003f -void CustomUpdateCamera(Camera *camera, int mode) -{ - Vector2 mousePositionDelta = GetMouseDelta(); - - bool moveInWorldPlane = ((mode == CAMERA_FIRST_PERSON) || (mode == CAMERA_THIRD_PERSON)); - bool rotateAroundTarget = ((mode == CAMERA_THIRD_PERSON) || (mode == CAMERA_ORBITAL)); - bool lockView = ((mode == CAMERA_FREE) || (mode == CAMERA_FIRST_PERSON) || (mode == CAMERA_THIRD_PERSON) || (mode == CAMERA_ORBITAL)); - bool rotateUp = false; - - // Camera speeds based on frame time - float cameraMoveSpeed = CAMERA_MOVE_SPEED*GetFrameTime(); - float cameraRotationSpeed = CAMERA_ROTATION_SPEED*GetFrameTime(); - float cameraPanSpeed = CAMERA_PAN_SPEED*GetFrameTime(); +void CustomUpdateCamera(Camera *camera, float orbitSpeed) { float cameraOrbitalSpeed = CAMERA_ORBITAL_SPEED*GetFrameTime(); - - // Orbital can just orbit Matrix rotation = MatrixRotate(GetCameraUp(camera), cameraOrbitalSpeed); Vector3 view = Vector3Subtract(camera->position, camera->target); view = Vector3Transform(view, rotation); camera->position = Vector3Add(camera->target, view); - // Zoom target distance CameraMoveToTarget(camera, -GetMouseWheelMove()); if (IsKeyPressed(KEY_KP_SUBTRACT)) CameraMoveToTarget(camera, 2.0f); if (IsKeyPressed(KEY_KP_ADD)) CameraMoveToTarget(camera, -2.0f); @@ -75,19 +52,15 @@ void CustomUpdateCamera(Camera *camera, int mode) #define TOGGLE_WIDTH 60 #define DROPDOWN_WIDTH 136 -const Color PUFF_RED = (Color){187, 0, 0, 255}; const Color PUFF_CYAN = (Color){0, 187, 187, 255}; const Color PUFF_WHITE = (Color){241, 241, 241, 241}; const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255}; -const Color TRANSPARENT = (Color){0, 0, 0, 0}; const Color CONSTELLATION = (Color){255, 255, 255, 128}; -const float EMPTY = -4242.0f; - -#define MAX_PARTICLES 1000 +#define MAX_PARTICLES 10000 #define MAX_POINTS 10000 -typedef struct Particle { +typedef struct Glyph { float x; float y; float i; @@ -95,24 +68,23 @@ typedef struct Particle { float g; float b; float a; -} Particle; +} Glyph; -typedef struct VertexBuffer { - float* vertices; - int n; -} VertexBuffer; +typedef struct Point { + float x; + float y; + float z; + float c; +} Point; typedef struct { + float click_x; + float click_y; float x; float y; int env_idx; int ary_idx; -} ScreenPoint; - -typedef struct { - ScreenPoint points[MAX_POINTS]; - int n; -} RenderedPoints; +} Tooltip; typedef struct { char *key; @@ -158,6 +130,12 @@ typedef struct PlotArgs { float y_max; float z_min; float z_max; + float c_min; + float c_max; + bool log_x; + bool log_y; + bool log_z; + bool log_c; int width; int height; int title_font_size; @@ -179,15 +157,16 @@ typedef struct PlotArgs { char* z_label; Font font; Font font_small; + Camera3D camera; } PlotArgs; PlotArgs DEFAULT_PLOT_ARGS = { - .x_min = EMPTY, - .x_max = EMPTY, - .y_min = EMPTY, - .y_max = EMPTY, - .z_min = EMPTY, - .z_max = EMPTY, + .x_min = 0.0f, + .x_max = 0.0f, + .y_min = 0.0f, + .y_max = 0.0f, + .z_min = 0.0f, + .z_max = 0.0f, .width = 960, .height = 540 - SETTINGS_HEIGHT, .title_font_size = 32, @@ -209,14 +188,11 @@ PlotArgs DEFAULT_PLOT_ARGS = { .z_label = "Train/Learning Rate", }; -float signed_log10(float x) { - if (fabs(x) < 1e-8) { - return -8.0f; - } - if (x > 0) { - return log10(x); +float safe_log10(float x) { + if (x <= 0) { + return x; } - return -log10(-x); + return log10(x); } const char* format_tick_label(double value) { @@ -280,172 +256,144 @@ void draw_labels(PlotArgs args) { ); } +void draw_x_tick(char* label, float x_pos, PlotArgs args) { + float y_pos = args.height - args.bottom_margin; + DrawLine( + x_pos, + y_pos - args.tick_length, + x_pos, + y_pos + args.tick_length, + args.axis_color + ); + Vector2 this_tick_size = MeasureTextEx(args.font, label, args.axis_tick_font_size, 0); + DrawTextEx( + args.font_small, + label, + (Vector2){ + x_pos - this_tick_size.x/2, + y_pos + args.tick_length + args.tick_margin, + }, + args.axis_tick_font_size, + 0, + PUFF_WHITE + ); +} + +void draw_y_tick(char* label, float y_pos, PlotArgs args) { + DrawLine( + args.left_margin - args.tick_length, + y_pos, + args.left_margin + args.tick_length, + y_pos, + args.axis_color + ); + Vector2 this_tick_size = MeasureTextEx(args.font, label, args.axis_tick_font_size, 0); + DrawTextEx( + args.font_small, + label, + (Vector2){ + args.left_margin - this_tick_size.x - args.tick_length - args.tick_margin, + y_pos - this_tick_size.y/2, + }, + args.axis_tick_font_size, + 0, + PUFF_WHITE + ); +} -void draw_ticks(PlotArgs args) { +Vector2 compute_ticks(PlotArgs args) { int width = args.width; int height = args.height; float plot_width = width - args.left_margin - args.right_margin; float plot_height = height - args.top_margin - args.bottom_margin; - // Autofit number of ticks Vector2 tick_label_size = MeasureTextEx(args.font, "estimate", args.axis_font_size, 0); int num_x_ticks = 1 + plot_width/tick_label_size.x; int num_y_ticks = 1 + plot_height/tick_label_size.y; - // X ticks - for (int i=0; ilocs[SHADER_LOC_VERTEX_POSITION], 3, GL_FLOAT, GL_FALSE, sizeof(Glyph), 0); + glEnableVertexAttribArray(shader->locs[SHADER_LOC_VERTEX_POSITION]); + int vertexColorLoc = shader->locs[SHADER_LOC_VERTEX_COLOR]; + glVertexAttribPointer(vertexColorLoc, 4, GL_FLOAT, GL_FALSE, sizeof(Glyph), (void*)(3*sizeof(float))); glEnableVertexAttribArray(vertexColorLoc); glBindBuffer(GL_ARRAY_BUFFER, 0); glBindVertexArray(0); - - rlDrawRenderBatchActive(); // Draw iternal buffers data (previous draw calls) + rlDrawRenderBatchActive(); rlSetBlendMode(RL_BLEND_ADDITIVE); - - int currentTimeLoc = GetShaderLocation(shader, "currentTime"); - glUseProgram(shader.id); + int currentTimeLoc = GetShaderLocation(*shader, "currentTime"); + glUseProgram(shader->id); glUniform1f(currentTimeLoc, GetTime()); Matrix modelViewProjection = MatrixMultiply(rlGetMatrixModelview(), rlGetMatrixProjection()); - glUniformMatrix4fv(shader.locs[SHADER_LOC_MATRIX_MVP], 1, false, MatrixToFloat(modelViewProjection)); + glUniformMatrix4fv(shader->locs[SHADER_LOC_MATRIX_MVP], 1, false, MatrixToFloat(modelViewProjection)); glBindVertexArray(vao); glDrawArrays(GL_POINTS, 0, n); glBindVertexArray(0); @@ -574,156 +519,6 @@ void plot_gl(Shader shader, VertexBuffer vertices) { rlSetBlendMode(RL_BLEND_ALPHA); } -void plot(RenderedPoints* rendered_points, int rxoff, int ryoff, Shader shader, Hyper* x, Hyper* y, - bool log_x, bool log_y, PlotArgs args, float* cmap, bool* filter) { - assert(x->n == y->n); - - int width = args.width; - int height = args.height; - - float plot_width = width - args.left_margin - args.right_margin; - float plot_height = height - args.top_margin - args.bottom_margin; - - // Compute ranges and apply log scaling if needed - float x_min = args.x_min; - float x_max = args.x_max; - float y_min = args.y_min; - float y_max = args.y_max; - - float dx = x_max - x_min; - float dy = y_max - y_min; - - // Count valid points after filtering - int valid_count = 0; - for (int i = 0; i < x->n; i++) { - if (filter == NULL || filter[i]) valid_count++; - } - - if (valid_count == 0) return; // Early exit if no points - - // Allocate vertex array - int idx = 0; - - // Preprocess points: transform and map to screen space - Particle particles[MAX_PARTICLES] = { 0 }; - for (int i = 0; i < x->n; i++) { - if (filter != NULL && !filter[i]) continue; - - // Apply log scaling - float xi = log_x ? log10f(x->ary[i]) : x->ary[i]; - float yi = log_y ? log10f(y->ary[i]) : y->ary[i]; - - // Map to screen coordinates with margins - xi = args.left_margin + (xi - x_min) / dx * plot_width; - yi = args.height - args.bottom_margin - (yi - y_min) / dy * plot_height; - - particles[i].x = xi; - particles[i].y = yi; - particles[i].i = i; - Color c = rgb(cmap[i]); - particles[i].r = c.r/255.0f; - particles[i].g = c.g/255.0f; - particles[i].b = c.b/255.0f; - particles[i].a = c.a/255.0f; - - rendered_points->points[rendered_points->n] = (ScreenPoint){ - rxoff + xi, - ryoff + yi, - rendered_points->points[rendered_points->n].env_idx, - idx - }; - rendered_points->n++; - - idx++; - } - - VertexBuffer buffer = {&particles, MAX_PARTICLES}; - plot_gl(shader, buffer); -} - -void plot3(RenderedPoints* rendered_points, Camera3D camera, Shader shader, Hyper* x, Hyper* y, Hyper* z, - bool log_x, bool log_y, bool log_z, PlotArgs args, float* cmap, bool* filter) { - assert(x->n == y->n && x->n == z->n); - float x_min = args.x_min; - float x_max = args.x_max; - float y_min = args.y_min; - float y_max = args.y_max; - float z_min = args.z_min; - float z_max = args.z_max; - - if (log_x) { - x_min = signed_log10(x_min); - x_max = signed_log10(x_max); - } - if (log_y) { - y_min = signed_log10(y_min); - y_max = signed_log10(y_max); - } - if (log_z) { - z_min = signed_log10(z_min); - z_max = signed_log10(z_max); - } - - float dx = x_max - x_min; - float dy = y_max - y_min; - float dz = z_max - z_min; - - Particle particles[MAX_PARTICLES] = { 0 }; - int idx = 0; - // Plot lines - for (int i = 0; i < x->n; i++) { - if (filter != NULL && !filter[i]) { - continue; - } - float xi = (log_x) ? signed_log10(x->ary[i]) : x->ary[i]; - float yi = (log_y) ? signed_log10(y->ary[i]) : y->ary[i]; - float zi = (log_z) ? signed_log10(z->ary[i]) : z->ary[i]; - - Color c = rgb(cmap[i]); - Vector3 point = (Vector3){(xi - x_min)/dx, (yi - y_min)/dy, (zi - z_min)/dz}; - - /* - DrawCube( - (Vector3){(xi - x_min)/dx, (yi - y_min)/dy, (zi - z_min)/dz}, - 0.02f, 0.02f, 0.02f, c - ); - - DrawSphere( - (Vector3){(xi - x_min)/dx, (yi - y_min)/dy, (zi - z_min)/dz}, - 0.02f, c - ); - */ - - // Project to screen space - Vector2 screen_pos = GetWorldToScreenEx(point, camera, 960, 520); - - particles[i].x = screen_pos.x; - particles[i].y = screen_pos.y; - particles[i].i = i; - c = rgb(cmap[i]); - particles[i].r = c.r/255.0f; - particles[i].g = c.g/255.0f; - particles[i].b = c.b/255.0f; - particles[i].a = c.a/255.0f; - - rendered_points->points[rendered_points->n] = (ScreenPoint){ - screen_pos.x, - screen_pos.y, - rendered_points->points[rendered_points->n].env_idx, - idx - }; - rendered_points->n++; - - //DrawBillboard(camera, whiteTexture, point, 0.1f, c); - idx++; - - } - VertexBuffer buffer = {&particles, idx}; - plot_gl(shader, buffer); - -} - - int cleanup(Hyper *map, int map_count, cJSON *root, char *json_str) { if (map) { for (int i=0; in; i++) { - float val = param->ary[i]; - if (log) { - val = signed_log10(val); + return (val - min)/(max - min); +} + +void autoscale(Point* points, int size, PlotArgs *args) { + float x_min = FLT_MAX; + float x_max = -FLT_MAX; + for (int i=0; i x_max) x_max = xi; + } + args->x_min = x_min; + args->x_max = x_max; + + float y_min = FLT_MAX; + float y_max = -FLT_MAX; + for (int i=0; i y_max) y_max = yi; + } + args->y_min = y_min; + args->y_max = y_max; + + float z_min = FLT_MAX; + float z_max = -FLT_MAX; + for (int i=0; i z_max) z_max = zi; + } + args->z_min = z_min; + args->z_max = z_max; + + float c_min = FLT_MAX; + float c_max = -FLT_MAX; + for (int i=0; i c_max) c_max = ci; + } + args->c_min = c_min; + args->c_max = c_max; +} + +void toPx(Point *points, Glyph* glyphs, int size, PlotArgs args) { + float x_min = args.log_x ? safe_log10(args.x_min) : args.x_min; + float x_max = args.log_x ? safe_log10(args.x_max) : args.x_max; + float y_min = args.log_y ? safe_log10(args.y_min) : args.y_min; + float y_max = args.log_y ? safe_log10(args.y_max) : args.y_max; + float z_min = args.log_z ? safe_log10(args.z_min) : args.z_min; + float z_max = args.log_z ? safe_log10(args.z_max) : args.z_max; + float c_min = args.log_c ? safe_log10(args.c_min) : args.c_min; + float c_max = args.log_c ? safe_log10(args.c_max) : args.c_max; + + float dx = x_max - x_min; + float dy = y_max - y_min; + float dz = z_max - z_min; + + for (int i = 0; i < size; i++) { + Point p = points[i]; + float xi = (args.log_x) ? safe_log10(p.x) : p.x; + float yi = (args.log_y) ? safe_log10(p.y) : p.y; + float zi = (args.log_z) ? safe_log10(p.z) : p.z; + float px, py; + + if (args.z_min != 0 || args.z_max != 0) { + Vector3 v = (Vector3){ + (xi - x_min)/dx, + (yi - y_min)/dy, + (zi - z_min)/dz + }; + assert(args.camera.fovy != 0); + Vector2 screen_pos = GetWorldToScreenEx(v, args.camera, args.width, args.height); + px = screen_pos.x; + py = screen_pos.y; + } else { + px = args.left_margin + (xi - x_min) / dx * args.width; + py = args.height - args.bottom_margin - (yi - y_min) / dy * args.height; + } + + float cmap = points[i].c; + if (args.log_c) { + cmap = safe_log10(cmap); + } + cmap = (cmap - c_min)/(c_max - c_min); + Color c = rgb(cmap); + glyphs[i] = (Glyph){ + px, + py, + i, + c.r/255.0f, + c.g/255.0f, + c.b/255.0f, + c.a/255.0f, + }; + } +} + +void update_closest(Tooltip* tooltip, Vector2 *indices, Glyph* glyphs, int size, float x_offset, float y_offset) { + float dx = tooltip->click_x - tooltip->x; + float dy = tooltip->click_y - tooltip->y; + float dist = sqrt(dx*dx + dy*dy); + + for (int i=0; iclick_x; + dy = y_offset + glyphs[i].y - tooltip->click_y; + float d = sqrt(dx*dx + dy*dy); + if (d < dist) { + dist = d; + tooltip->x = x_offset + glyphs[i].x; + tooltip->y = y_offset + glyphs[i].y; + tooltip->env_idx = indices[i].x; + tooltip->ary_idx = indices[i].y; } - cmap[i] = (val - c_min)/(c_max - c_min); } } @@ -923,8 +828,10 @@ int main(void) { strcat(env_options, data.envs[i].key); } - // Rendered points - RenderedPoints rendered_points = {0}; + // Points + Point* points = calloc(MAX_POINTS, sizeof(Point)); + Glyph* glyphs = calloc(MAX_PARTICLES, sizeof(Glyph)); + Vector2* env_indices = calloc(MAX_POINTS, sizeof(Vector2)); // Initialize Raylib SetConfigFlags(FLAG_MSAA_4X_HINT); @@ -952,45 +859,42 @@ int main(void) { #endif Camera3D camera = (Camera3D){ 0 }; - camera.position = (Vector3){ 1.5f, 1.25f, 1.5f }; - camera.target = (Vector3){ 0.5f, 0.5f, 0.5f }; - camera.up = (Vector3){ 0.0f, 1.0f, 0.0f }; - camera.fovy = 45.0f; - camera.projection = CAMERA_PERSPECTIVE; PlotArgs args1 = DEFAULT_PLOT_ARGS; + args1.camera = (Camera3D){ 0 }; + args1.camera.position = (Vector3){ 1.5f, 1.25f, 1.5f }; + args1.camera.target = (Vector3){ 0.5f, 0.5f, 0.5f }; + args1.camera.up = (Vector3){ 0.0f, 1.0f, 0.0f }; + args1.camera.fovy = 45.0f; + args1.camera.projection = CAMERA_PERSPECTIVE; + args1.log_x = true; + args1.log_z = true; RenderTexture2D fig1 = LoadRenderTexture(args1.width, args1.height); RenderTexture2D fig1_overlay = LoadRenderTexture(args1.width, args1.height); int fig1_env_idx = 0; bool fig1_env_active = false; bool fig1_x_active = false; int fig1_x_idx = 0; - bool fig1_x_log = true; bool fig1_y_active = false; int fig1_y_idx = 2; - bool fig1_y_log = false; bool fig1_z_active = false; int fig1_z_idx = 1; - bool fig1_z_log = true; int fig1_color_idx = 0; bool fig1_color_active = false; - bool fig1_log_color = true; PlotArgs args2 = DEFAULT_PLOT_ARGS; RenderTexture2D fig2 = LoadRenderTexture(args2.width, args2.height); //SetTextureFilter(fig2.texture, TEXTURE_FILTER_POINT); args2.left_margin = 50; args2.right_margin = 50; + args2.log_x = true; int fig2_env_idx = 1; bool fig2_env_active = false; bool fig2_x_active = false; int fig2_x_idx = 1; - bool fig2_x_log = true; bool fig2_y_active = false; int fig2_y_idx = 2; - bool fig2_y_log = false; int fig2_color_idx = 1; bool fig2_color_active = false; - bool fig2_log_color = true; PlotArgs args3 = DEFAULT_PLOT_ARGS; RenderTexture2D fig3 = LoadRenderTexture(args3.width, args3.height); @@ -999,6 +903,8 @@ int main(void) { args3.right_margin = 10; args3.top_margin = 10; args3.bottom_margin = 10; + args3.x_label = "tsne1"; + args3.y_label = "tsne2"; bool fig3_range1_active = false; int fig3_range1_idx = 2; char fig3_range1_min[32]; @@ -1014,6 +920,14 @@ int main(void) { PlotArgs args4 = DEFAULT_PLOT_ARGS; RenderTexture2D fig4 = LoadRenderTexture(args4.width, args4.height); + args4.x_label = "Value"; + args4.y_label = "Hyperparameter"; + args4.left_margin = 170; + args4.right_margin = 50; + args4.top_margin = 10; + args4.bottom_margin = 50; + args4.x_min = 1e-8; + args4.x_max = 1e8; bool fig4_x_log = true; bool fig4_range1_active = false; int fig4_range1_idx = 2; @@ -1043,23 +957,26 @@ int main(void) { Hyper* x; Hyper* y; Hyper* z; + Hyper* c; char* x_label; char* y_label; char* z_label; bool *filter = calloc(max_data_points, sizeof(bool)); - float *cmap = calloc(max_data_points, sizeof(float)); + Tooltip tooltip = {0}; Vector2 focus = {0, 0}; while (!WindowShouldClose()) { - rendered_points.n = 0; + int screen_points_count = 0; BeginDrawing(); ClearBackground(PUFF_BACKGROUND); if (IsMouseButtonPressed(MOUSE_LEFT_BUTTON)) { focus = GetMousePosition(); + tooltip.click_x = focus.x; + tooltip.click_y = focus.y; } // Figure 1 @@ -1071,51 +988,37 @@ int main(void) { args1.z_label = z_label; int start = 0; int end = data.n; - float c_min = 0.0f; - float c_max = 1.0f; if (fig1_env_idx != 0) { start = fig1_env_idx - 1; end = fig1_env_idx; } - args1.x_min = hyper_min(&data, hyper_key[fig1_x_idx], start, end); - args1.x_max = hyper_max(&data, hyper_key[fig1_x_idx], start, end); - args1.y_min = hyper_min(&data, hyper_key[fig1_y_idx], start, end); - args1.y_max = hyper_max(&data, hyper_key[fig1_y_idx], start, end); - args1.z_min = hyper_min(&data, hyper_key[fig1_z_idx], start, end); - args1.z_max = hyper_max(&data, hyper_key[fig1_z_idx], start, end); - float x_mid = fig1_x_log ? (log10(args1.x_max) + log10(args1.x_min))/2.0f : (args1.x_max + args1.x_min)/2.0f; - float y_mid = fig1_y_log ? (log10(args1.y_max) + log10(args1.y_min))/2.0f : (args1.y_max + args1.y_min)/2.0f; - float z_mid = fig1_z_log ? (log10(args1.z_max) + log10(args1.z_min))/2.0f : (args1.z_max + args1.z_min)/2.0f; - //camera.target = (Vector3){x_mid, y_mid, z_mid}; BeginTextureMode(fig1); ClearBackground(PUFF_BACKGROUND); - if (fig1_color_idx != 0) { - c_min = hyper_min(&data, hyper_key[fig1_color_idx - 1], start, end); - c_max = hyper_max(&data, hyper_key[fig1_color_idx - 1], start, end); - } - memset(cmap, 0.0f, data.n * sizeof(float)); - Hyper* color_param = NULL; + int size = 0; for (int i=start; in; j++) { - cmap[j] = i/(float)data.n; - } + c = get_hyper(&data, env, hyper_key[fig1_color_idx - 1]); } - //BeginShaderMode(shader); for (int j=0; jn; j++) { - rendered_points.points[rendered_points.n + j].env_idx = i; + points[size] = (Point){ + x->ary[j], + y->ary[j], + z->ary[j], + (fig1_color_idx == 0) ? i/(float)data.n : c->ary[j], + }; + env_indices[size] = (Vector2){i, j}; + size++; } - plot3(&rendered_points, camera, shader, x, y, z, fig1_x_log, fig1_y_log, fig1_z_log, args1, cmap, NULL); - //EndShaderMode(); } + autoscale(points, size, &args1); + toPx(points, glyphs, size, args1); + update_closest(&tooltip, env_indices, glyphs, size, 0, 2*SETTINGS_HEIGHT); + plot_gl(glyphs, size, &shader); // Find best hypers float tsne_thresh = 100.0f; @@ -1192,9 +1095,9 @@ int main(void) { } } - BeginMode3D(camera); - CustomUpdateCamera(&camera, CAMERA_ORBITAL); - //draw_axes3(args1, fig1_x_log, fig1_y_log, fig1_z_log); + BeginMode3D(args1.camera); + CustomUpdateCamera(&args1.camera, CAMERA_ORBITAL_SPEED); + draw_axes3(); EndMode3D(); EndTextureMode(); @@ -1209,72 +1112,46 @@ int main(void) { start = 0; end = data.n; - c_min = 0.0f; - c_max = 1.0f; if (fig2_env_idx != 0) { start = fig2_env_idx - 1; end = fig2_env_idx; } - - args2.x_min = hyper_min(&data, hyper_key[fig2_x_idx], start, end); - args2.x_max = hyper_max(&data, hyper_key[fig2_x_idx], start, end); - args2.y_min = hyper_min(&data, hyper_key[fig2_y_idx], start, end); - args2.y_max = hyper_max(&data, hyper_key[fig2_y_idx], start, end); - args2.x_min = (fig2_x_log) ? log10(args2.x_min) : args2.x_min; - args2.x_max = (fig2_x_log) ? log10(args2.x_max) : args2.x_max; - args2.y_min = (fig2_y_log) ? log10(args2.y_min) : args2.y_min; - args2.y_max = (fig2_y_log) ? log10(args2.y_max) : args2.y_max; - - if (fig2_color_idx != 0) { - c_min = hyper_min(&data, hyper_key[fig2_color_idx - 1], start, end); - c_max = hyper_max(&data, hyper_key[fig2_color_idx - 1], start, end); - } - memset(cmap, 0.0f, data.n * sizeof(float)); - color_param = NULL; - - //rlSetBlendMode(RL_BLEND_ADDITIVE); - //BeginShaderMode(shader); + size = 0; for (int i=start; in; j++) { - cmap[j] = i/(float)data.n; - } + c = get_hyper(&data, env, hyper_key[fig2_color_idx - 1]); } for (int j=0; jn; j++) { - rendered_points.points[rendered_points.n + j].env_idx = i; + points[size] = (Point){ + x->ary[j], + y->ary[j], + 0.0f, + (fig2_color_idx == 0) ? i/(float)data.n : c->ary[j], + }; + env_indices[size] = (Vector2){i, j}; + size++; } - plot(&rendered_points, fig1.texture.width, 0, shader, x, y, fig2_x_log, fig2_y_log, args2, cmap, NULL); } - //EndShaderMode(); - //rlSetBlendMode(RL_BLEND_ALPHA); + autoscale(points, size, &args2); + toPx(points, glyphs, size, args2); + update_closest(&tooltip, env_indices, glyphs, size, fig1.texture.width, 2*SETTINGS_HEIGHT); + plot_gl(glyphs, size, &shader); draw_axes(args2); - draw_ticks(args2); + draw_all_ticks(args2); EndTextureMode(); // Figure 3 - args3.x_label = "tsne1"; - args3.y_label = "tsne2"; - args3.x_min = hyper_min(&data, "tsne1", 0, data.n); - args3.x_max = hyper_max(&data, "tsne1", 0, data.n); - args3.y_min = hyper_min(&data, "tsne2", 0, data.n); - args3.y_max = hyper_max(&data, "tsne2", 0, data.n); BeginTextureMode(fig3); ClearBackground(PUFF_BACKGROUND); - + size = 0; for (int i=0; in; j++) { - cmap[j] = i/(float)data.n; - } for (int j=0; jn; j++) { filter[j] = true; } @@ -1282,24 +1159,30 @@ int main(void) { apply_filter(filter, filter_param_1, fig3_range1_min_val, fig3_range1_max_val); Hyper* filter_param_2 = get_hyper(&data, env, hyper_key[fig3_range2_idx]); apply_filter(filter, filter_param_2, fig3_range2_min_val, fig3_range2_max_val); + for (int j=0; jn; j++) { - rendered_points.points[rendered_points.n + j].env_idx = i; + if (!filter[j]) { + continue; + } + points[size] = (Point){ + x->ary[j], + y->ary[j], + 0.0f, + i/(float)data.n + }; + env_indices[size] = (Vector2){i, j}; + size++; } - plot(&rendered_points, 0, fig1.texture.height, shader, x, y, false, false, args3, cmap, filter); } + autoscale(points, size, &args3); + toPx(points, glyphs, size, args3); + update_closest(&tooltip, env_indices, glyphs, size, 0, fig1.texture.height + 2*SETTINGS_HEIGHT); + plot_gl(glyphs, size, &shader); //draw_axes(args3); EndTextureMode(); // Figure 4 - args4.x_label = "Value"; - args4.y_label = "Hyperparameter"; - args4.left_margin = 170; - args4.right_margin = 50; - args4.top_margin = 10; - args4.bottom_margin = 50; - args4.x_min = 1e-8; - args4.x_max = 1e8; BeginTextureMode(fig4); ClearBackground(PUFF_BACKGROUND); rlSetBlendFactorsSeparate(0x0302, 0x0303, 1, 0x0303, 0x8006, 0x8006); @@ -1328,14 +1211,14 @@ int main(void) { DrawTextureRec( fig1.texture, (Rectangle){0, 0, fig1.texture.width, -fig1.texture.height }, - (Vector2){ 0, SETTINGS_HEIGHT }, WHITE + (Vector2){ 0, 2*SETTINGS_HEIGHT }, WHITE ); BeginShaderMode(blur_shader); rlSetBlendMode(RL_BLEND_ADDITIVE); DrawTextureRec( fig1_overlay.texture, (Rectangle){0, 0, fig1_overlay.texture.width, -fig1_overlay.texture.height }, - (Vector2){ 0, SETTINGS_HEIGHT }, WHITE + (Vector2){ 0, 2*SETTINGS_HEIGHT }, WHITE ); rlSetBlendMode(RL_BLEND_ALPHA); EndShaderMode(); @@ -1365,12 +1248,12 @@ int main(void) { ); // Figure 1 Overlay - float x_min = (fig1_x_log) ? log10(args1.x_min) : args1.x_min; - float x_max = (fig1_x_log) ? log10(args1.x_max) : args1.x_max; - float y_min = (fig1_y_log) ? log10(args1.y_min) : args1.y_min; - float y_max = (fig1_y_log) ? log10(args1.y_max) : args1.y_max; - float z_min = (fig1_z_log) ? log10(args1.z_min) : args1.z_min; - float z_max = (fig1_z_log) ? log10(args1.z_max) : args1.z_max; + float x_min = (args1.log_x) ? safe_log10(args1.x_min) : args1.x_min; + float x_max = (args1.log_x) ? safe_log10(args1.x_max) : args1.x_max; + float y_min = (args1.log_y) ? safe_log10(args1.y_min) : args1.y_min; + float y_max = (args1.log_y) ? safe_log10(args1.y_max) : args1.y_max; + float z_min = (args1.log_z) ? safe_log10(args1.z_min) : args1.z_min; + float z_max = (args1.log_z) ? safe_log10(args1.z_max) : args1.z_max; for (int k=0; k<4; k++) { int bsi = best_srci[k]; char* src_env = data.envs[bsi].key; @@ -1382,9 +1265,9 @@ int main(void) { float yi = y->ary[src_idx]; float zi = z->ary[src_idx]; - xi = (fig1_x_log) ? signed_log10(xi) : xi; - yi = (fig1_y_log) ? signed_log10(yi) : yi; - zi = (fig1_z_log) ? signed_log10(zi) : zi; + xi = (args1.log_x) ? safe_log10(xi) : xi; + yi = (args1.log_y) ? safe_log10(yi) : yi; + zi = (args1.log_z) ? safe_log10(zi) : zi; Vector3 src_point = (Vector3){ (xi - x_min)/(x_max - x_min), @@ -1407,9 +1290,9 @@ int main(void) { float yj = y->ary[bdi]; float zj = z->ary[bdi]; - xj = (fig1_x_log) ? signed_log10(xj) : xj; - yj = (fig1_y_log) ? signed_log10(yj) : yj; - zj = (fig1_z_log) ? signed_log10(zj) : zj; + xj = (args1.log_x) ? safe_log10(xj) : xj; + yj = (args1.log_y) ? safe_log10(yj) : yj; + zj = (args1.log_z) ? safe_log10(zj) : zj; Vector3 dst_point = (Vector3){ (xj - x_min)/(x_max - x_min), @@ -1463,7 +1346,7 @@ int main(void) { ); } - float tsne_thresh_px = sqrt(tsne_thresh)*args3.width/(args3.x_max - args3.x_min); + //float tsne_thresh_px = sqrt(tsne_thresh)*args3.width/(args3.x_max - args3.x_min); //DrawCircleLines(xi, yi, tsne_thresh_px, CONSTELLATION); } @@ -1487,39 +1370,37 @@ int main(void) { if (GuiDropdownBox(fig1_env_rect, env_options, &fig1_env_idx, fig1_env_active)){ fig1_env_active = !fig1_env_active; } - GuiDropdownCheckbox(DROPDOWN_WIDTH, 0, options, &fig1_x_idx, &fig1_x_active, "Log X", &fig1_x_log); - GuiDropdownCheckbox(2*DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, options, &fig1_y_idx, &fig1_y_active, "Log Y", &fig1_y_log); - GuiDropdownCheckbox(3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, options, &fig1_z_idx, &fig1_z_active, "Log Z", &fig1_z_log); - GuiDropdownCheckbox(4*DROPDOWN_WIDTH + 3*TOGGLE_WIDTH, 0, env_hyper_options, &fig1_color_idx, &fig1_color_active, "Log Color", &fig1_log_color); + GuiDropdownCheckbox(DROPDOWN_WIDTH, 0, options, &fig1_x_idx, &fig1_x_active, "Log X", &args1.log_x); + GuiDropdownCheckbox(2*DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, options, &fig1_y_idx, &fig1_y_active, "Log Y", &args1.log_y); + GuiDropdownCheckbox(3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, options, &fig1_z_idx, &fig1_z_active, "Log Z", &args1.log_z); + GuiDropdownCheckbox(4*DROPDOWN_WIDTH + 3*TOGGLE_WIDTH, 0, env_hyper_options, &fig1_color_idx, &fig1_color_active, "Log Color", &args1.log_c); // Figure 2 UI Rectangle fig2_env_rect = {fig1.texture.width, 0, DROPDOWN_WIDTH, SETTINGS_HEIGHT}; if (GuiDropdownBox(fig2_env_rect, env_options, &fig2_env_idx, fig2_env_active)){ fig2_env_active = !fig2_env_active; } - GuiDropdownCheckbox(fig1.texture.width + DROPDOWN_WIDTH, 0, options, &fig2_x_idx, &fig2_x_active, "Log X", &fig2_x_log); - GuiDropdownCheckbox(fig1.texture.width + 2*DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, options, &fig2_y_idx, &fig2_y_active, "Log Y", &fig2_y_log); - GuiDropdownCheckbox(fig1.texture.width + 3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, env_hyper_options, &fig2_color_idx, &fig2_color_active, "Log Color", &fig2_log_color); + GuiDropdownCheckbox(fig1.texture.width + DROPDOWN_WIDTH, 0, options, &fig2_x_idx, &fig2_x_active, "Log X", &args2.log_x); + GuiDropdownCheckbox(fig1.texture.width + 2*DROPDOWN_WIDTH + TOGGLE_WIDTH, 0, options, &fig2_y_idx, &fig2_y_active, "Log Y", &args2.log_y); + GuiDropdownCheckbox(fig1.texture.width + 3*DROPDOWN_WIDTH + 2*TOGGLE_WIDTH, 0, env_hyper_options, &fig2_color_idx, &fig2_color_active, "Log Color", &args2.log_c); // Tooltip - float x = focus.x; - float y = focus.y; - float min_dist = FLT_MAX; - int min_i = 0; - printf("N points: %d\n", rendered_points.n); - for (int i=0; ikey; + DrawTextEx( + args1.font_small, + TextFormat("%s[%d]", env_key, ary_idx), + (Vector2){ + tooltip.x, + tooltip.y, + }, + args1.axis_tick_font_size, + 0, + WHITE + ); //DrawFPS(GetScreenWidth() - 95, 10); EndDrawing(); From 33bd507836732f88227e83b3e6d6f0625eac341a Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 17 Oct 2025 20:55:00 +0000 Subject: [PATCH 067/188] tooltip --- pufferlib/ocean/constellation/constellation.c | 58 ++++++++++++++----- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index b27f53053..ba812f46a 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -84,6 +84,7 @@ typedef struct { float y; int env_idx; int ary_idx; + bool active; } Tooltip; typedef struct { @@ -691,6 +692,21 @@ void update_closest(Tooltip* tooltip, Vector2 *indices, Glyph* glyphs, int size, } } +void copy_hypers_to_clipboard(Env *env, char* buffer, int ary_idx) { + char* start = buffer; + for (int hyper_idx = 0; hyper_idx < env->n; hyper_idx++) { + Hyper *hyper = &env->hypers[hyper_idx]; + char *slash = strchr(hyper->key, '/'); + if (!slash) { + continue; + } + char* suffix = slash + 1; + buffer += sprintf(buffer, "%s = %f\n", suffix, hyper->ary[ary_idx]); + } + buffer[0] = '\0'; + SetClipboardText(start); +} + int main(void) { FILE *file = fopen("pufferlib/ocean/constellation/all_cache.json", "r"); if (!file) { @@ -828,6 +844,8 @@ int main(void) { strcat(env_options, data.envs[i].key); } + char* clipboard = malloc(1024); + // Points Point* points = calloc(MAX_POINTS, sizeof(Point)); Glyph* glyphs = calloc(MAX_PARTICLES, sizeof(Glyph)); @@ -965,18 +983,26 @@ int main(void) { bool *filter = calloc(max_data_points, sizeof(bool)); Tooltip tooltip = {0}; + Vector2 focus = {0, 0}; while (!WindowShouldClose()) { int screen_points_count = 0; + bool right_clicked = false; BeginDrawing(); ClearBackground(PUFF_BACKGROUND); if (IsMouseButtonPressed(MOUSE_LEFT_BUTTON)) { focus = GetMousePosition(); - tooltip.click_x = focus.x; - tooltip.click_y = focus.y; + tooltip.active = false; + } + if (IsMouseButtonPressed(MOUSE_RIGHT_BUTTON)) { + Vector2 mouse_pos = GetMousePosition(); + right_clicked = true; + tooltip.active = true; + tooltip.click_x = mouse_pos.x; + tooltip.click_y = mouse_pos.y; } // Figure 1 @@ -1390,20 +1416,26 @@ int main(void) { Env* env = &data.envs[env_idx]; char* env_key = env->key; - DrawTextEx( - args1.font_small, - TextFormat("%s[%d]", env_key, ary_idx), - (Vector2){ - tooltip.x, - tooltip.y, - }, - args1.axis_tick_font_size, - 0, - WHITE - ); + float cost = get_hyper(&data, env_key, "cost")->ary[ary_idx]; + float score = get_hyper(&data, env_key, "environment/score")->ary[ary_idx]; + float steps = get_hyper(&data, env_key, "agent_steps")->ary[ary_idx]; + + if (tooltip.active) { + char* text = TextFormat("%s\nscore = %f\ncost = %f\nsteps = %f", env_key, score, cost, steps); + Vector2 text_size = MeasureTextEx(args1.font_small, text, args1.axis_tick_font_size, 0); + DrawRectangle(tooltip.x, tooltip.y, text_size.x + 4, text_size.y + 4, PUFF_BACKGROUND); + DrawCircle(tooltip.x, tooltip.y, 2, PUFF_CYAN); + DrawTextEx(args1.font_small, text, (Vector2){tooltip.x + 2, tooltip.y + 2}, args1.axis_tick_font_size, 0, WHITE); + } //DrawFPS(GetScreenWidth() - 95, 10); EndDrawing(); + + // Copy hypers to clipboard + int total_len = 0; + if (right_clicked) { + copy_hypers_to_clipboard(env, clipboard, ary_idx); + } } UnloadShader(shader); From 21d66c7b8b5926641eb273f8905843642398e3e4 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Sat, 18 Oct 2025 01:12:33 +0000 Subject: [PATCH 068/188] Latest --- pufferlib/ocean/constellation/constellation.c | 327 ++++++++++-------- 1 file changed, 190 insertions(+), 137 deletions(-) diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index ba812f46a..1303a659e 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -36,7 +36,7 @@ #include "rlgl.h" #include "raymath.h" -#define CAMERA_ORBITAL_SPEED 0.025f +#define CAMERA_ORBITAL_SPEED 0.05f void CustomUpdateCamera(Camera *camera, float orbitSpeed) { float cameraOrbitalSpeed = CAMERA_ORBITAL_SPEED*GetFrameTime(); Matrix rotation = MatrixRotate(GetCameraUp(camera), cameraOrbitalSpeed); @@ -659,7 +659,9 @@ void toPx(Point *points, Glyph* glyphs, int size, PlotArgs args) { if (args.log_c) { cmap = safe_log10(cmap); } - cmap = (cmap - c_min)/(c_max - c_min); + if (c_min != c_max) { + cmap = (cmap - c_min)/(c_max - c_min); + } Color c = rgb(cmap); glyphs[i] = (Glyph){ px, @@ -707,6 +709,32 @@ void copy_hypers_to_clipboard(Env *env, char* buffer, int ary_idx) { SetClipboardText(start); } +void compute_constellation(Dataset *data, int* env_idxs, float* env_dists, + float env_perf, float perf_threshold, Vector2 tsne, float tsne_thresh) { + for (int i=0; in; i++) { + Env* env = &data->envs[i]; + Hyper* perf = get_hyper(data, env->key, "environment/perf"); + Hyper* tsne1 = get_hyper(data, env->key, "tsne1"); + Hyper* tsne2 = get_hyper(data, env->key, "tsne2"); + for (int j=0; jn; j++) { + if (perf->ary[j] < perf_threshold) { + continue; + } + float t1_dist = tsne1->ary[j] - tsne.x; + float t2_dist = tsne2->ary[j] - tsne.y; + float tsne_dist = t1_dist*t1_dist + t2_dist*t2_dist; + if (tsne_dist > tsne_thresh) { + continue; + } + if (tsne_dist < env_dists[i]) { + env_dists[i] = tsne_dist; + env_idxs[i] = j; + } + } + } +} + + int main(void) { FILE *file = fopen("pufferlib/ocean/constellation/all_cache.json", "r"); if (!file) { @@ -986,6 +1014,50 @@ int main(void) { Vector2 focus = {0, 0}; + // Find best hypers + float tsne_thresh = 100.0f; + memset(best_n, 0, sizeof(int)*4); + memset(best_srci, 0, sizeof(int)*4); + for (int env_i=0; env_ikey, "environment/perf"); + Hyper* src_tsne1 = get_hyper(&data, src->key, "tsne1"); + Hyper* src_tsne2 = get_hyper(&data, src->key, "tsne2"); + for (int i=0; in; i++) { + float perfi = src_perf->ary[i]; + Vector2 tsnei = (Vector2){src_tsne1->ary[i], src_tsne2->ary[i]}; + for (int ki=0; ki<4; ki++) { + if (perfi < perf_thresholds[ki]) { + continue; + } + for (int kj=0; kj best_n[ki]) { + best_n[ki] = temp_n; + best_srci[ki] = env_i; + for (int kj=0; kjkey, "environment/perf"); - Hyper* src_tsne1 = get_hyper(&data, src->key, "tsne1"); - Hyper* src_tsne2 = get_hyper(&data, src->key, "tsne2"); - for (int i=0; in; i++) { - float perfi = src_perf->ary[i]; - if (perfi < perf_thresholds[0]) { - continue; - } - float t1i = src_tsne1->ary[i]; - float t2i = src_tsne2->ary[i]; - for (int ki=0; ki<4; ki++) { - for (int kj=0; kjkey, "environment/perf"); - Hyper* dst_tsne1 = get_hyper(&data, dst->key, "tsne1"); - Hyper* dst_tsne2 = get_hyper(&data, dst->key, "tsne2"); - for (int j=0; jn; j++) { - float perfj = dst_perf->ary[j]; - if (perfj < perf_thresholds[0]) { - continue; - } - float t1j = dst_tsne1->ary[j]; - float t2j = dst_tsne2->ary[j]; - float t1_dist = t1i - t1j; - float t2_dist = t2i - t2j; - float tsne_dist = t1_dist*t1_dist + t2_dist*t2_dist; - if (tsne_dist > tsne_thresh) { - continue; - } - for (int thresh_idx=0; thresh_idx<4; thresh_idx++) { - float perf_thresh = perf_thresholds[thresh_idx]; - if (perfi < perf_thresh || perfj < perf_thresh) { - break; - } - if (temp_idx[thresh_idx][env_j] == -1 || tsne_dist < temp_dist[thresh_idx][env_j]) { - temp_idx[thresh_idx][env_j] = j; - temp_dist[thresh_idx][env_j] = tsne_dist; - } - } - } - } - - for (int ki=0; ki<4; ki++) { - int temp_n = 0; - for (int kj=0; kj best_n[ki]) { - best_n[ki] = temp_n; - best_srci[ki] = env_i; - for (int kj=0; kjary[src_idx]; - float yi = y->ary[src_idx]; - float zi = z->ary[src_idx]; - - xi = (args1.log_x) ? safe_log10(xi) : xi; - yi = (args1.log_y) ? safe_log10(yi) : yi; - zi = (args1.log_z) ? safe_log10(zi) : zi; - - Vector3 src_point = (Vector3){ - (xi - x_min)/(x_max - x_min), - (yi - y_min)/(y_max - y_min), - (zi - z_min)/(z_max - z_min) - }; + if (fig1_env_idx == 0) { + float x_min = (args1.log_x) ? safe_log10(args1.x_min) : args1.x_min; + float x_max = (args1.log_x) ? safe_log10(args1.x_max) : args1.x_max; + float y_min = (args1.log_y) ? safe_log10(args1.y_min) : args1.y_min; + float y_max = (args1.log_y) ? safe_log10(args1.y_max) : args1.y_max; + float z_min = (args1.log_z) ? safe_log10(args1.z_min) : args1.z_min; + float z_max = (args1.log_z) ? safe_log10(args1.z_max) : args1.z_max; + for (int k=0; k<4; k++) { + int bsi = best_srci[k]; + char* src_env = data.envs[bsi].key; + int src_idx = best_idx[k][bsi]; + x = get_hyper(&data, src_env, hyper_key[fig1_x_idx]); + y = get_hyper(&data, src_env, hyper_key[fig1_y_idx]); + z = get_hyper(&data, src_env, hyper_key[fig1_z_idx]); + float xi = x->ary[src_idx]; + float yi = y->ary[src_idx]; + float zi = z->ary[src_idx]; + + xi = (args1.log_x) ? safe_log10(xi) : xi; + yi = (args1.log_y) ? safe_log10(yi) : yi; + zi = (args1.log_z) ? safe_log10(zi) : zi; + + Vector3 src_point = (Vector3){ + (xi - x_min)/(x_max - x_min), + (yi - y_min)/(y_max - y_min), + (zi - z_min)/(z_max - z_min) + }; - Vector2 screen_i = GetWorldToScreenEx(src_point, camera, 960, 520); + Vector2 screen_i = GetWorldToScreenEx(src_point, camera, 960, 520); - for (int i=0; iary[bdi]; + float yj = y->ary[bdi]; + float zj = z->ary[bdi]; + + xj = (args1.log_x) ? safe_log10(xj) : xj; + yj = (args1.log_y) ? safe_log10(yj) : yj; + zj = (args1.log_z) ? safe_log10(zj) : zj; + + Vector3 dst_point = (Vector3){ + (xj - x_min)/(x_max - x_min), + (yj - y_min)/(y_max - y_min), + (zj - z_min)/(z_max - z_min) + }; + Vector2 screen_j = GetWorldToScreenEx(dst_point, camera, 960, 520); + DrawLineEx( + (Vector2){screen_i.x, screen_i.y}, + (Vector2){screen_j.x, screen_j.y}, + 2, + CONSTELLATION + ); } - char* dst_env = data.envs[i].key; - x = get_hyper(&data, dst_env, hyper_key[fig1_x_idx]); - y = get_hyper(&data, dst_env, hyper_key[fig1_y_idx]); - z = get_hyper(&data, dst_env, hyper_key[fig1_z_idx]); - float xj = x->ary[bdi]; - float yj = y->ary[bdi]; - float zj = z->ary[bdi]; - - xj = (args1.log_x) ? safe_log10(xj) : xj; - yj = (args1.log_y) ? safe_log10(yj) : yj; - zj = (args1.log_z) ? safe_log10(zj) : zj; - - Vector3 dst_point = (Vector3){ - (xj - x_min)/(x_max - x_min), - (yj - y_min)/(y_max - y_min), - (zj - z_min)/(z_max - z_min) - }; - Vector2 screen_j = GetWorldToScreenEx(dst_point, camera, 960, 520); - DrawLineEx( - (Vector2){screen_i.x, screen_i.y}, - (Vector2){screen_j.x, screen_j.y}, - 2, - CONSTELLATION - ); } } + /* + Rectangle bounds = {0, 0, fig1.texture.width, fig1.texture.height}; + int env_n = data.envs[0].n; + Point points[4*(env_n + 1)]; + Glyph glyphs[4*(env_n + 1)]; + for (int k=0; k<4; k++) { + int bsi = best_srci[k]; + char* src_env = data.envs[bsi].key; + int src_idx = best_idx[k][bsi]; + float xx = get_hyper(&data, src_env, hyper_key[fig1_x_idx])->ary[src_idx]; + float yy = get_hyper(&data, src_env, hyper_key[fig1_y_idx])->ary[src_idx]; + float zz = get_hyper(&data, src_env, hyper_key[fig1_z_idx])->ary[src_idx]; + float cc = get_hyper(&data, src_env, hyper_key[fig1_color_idx])->ary[src_idx]; + points[k*env_n] = (Point){.x = xx, .y = yy, .z = zz, .c = cc}; + for (int i=0; iary[i]; + float yj = get_hyper(&data, dst_env, hyper_key[fig1_y_idx])->ary[i]; + float zj = get_hyper(&data, dst_env, hyper_key[fig1_z_idx])->ary[i]; + float cj = get_hyper(&data, dst_env, hyper_key[fig1_color_idx])->ary[i]; + points[k*env_n + i] = (Point){.x = xj, .y = yj, .z = zj, .c = cj}; + } + } + toPx(points, glyphs, 4*(env_n + 1), args1); + for (int k=0; k<4; k++) { + Glyph src = glyphs[k*env_n]; + Vector2 src_point = (Vector2){src.x, src.y}; + if (!CheckCollisionPointRec(src_point, bounds)) { + continue; + } + for (int i=0; iary[ary_idx]; float score = get_hyper(&data, env_key, "environment/score")->ary[ary_idx]; float steps = get_hyper(&data, env_key, "agent_steps")->ary[ary_idx]; + float perf = get_hyper(&data, env_key, "environment/perf")->ary[ary_idx]; + float tsne1 = get_hyper(&data, env_key, "tsne1")->ary[ary_idx]; + float tsne2 = get_hyper(&data, env_key, "tsne2")->ary[ary_idx]; + Vector2 tsne = (Vector2){tsne1, tsne2}; if (tooltip.active) { + /* + float idx[env->n]; + float dist[env->n]; + compute_constellation(&data, idx, dist, perf, perf, tsne, tsne_thresh); + for (int i=0; in; i++) { + if (idx[i] == -1) { + continue; + } + */ + char* text = TextFormat("%s\nscore = %f\ncost = %f\nsteps = %f", env_key, score, cost, steps); Vector2 text_size = MeasureTextEx(args1.font_small, text, args1.axis_tick_font_size, 0); DrawRectangle(tooltip.x, tooltip.y, text_size.x + 4, text_size.y + 4, PUFF_BACKGROUND); DrawCircle(tooltip.x, tooltip.y, 2, PUFF_CYAN); DrawTextEx(args1.font_small, text, (Vector2){tooltip.x + 2, tooltip.y + 2}, args1.axis_tick_font_size, 0, WHITE); - } //DrawFPS(GetScreenWidth() - 95, 10); EndDrawing(); From 740eee3ecb576803cef38f77934e74d55d8458d4 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Sat, 18 Oct 2025 12:34:58 +0000 Subject: [PATCH 069/188] latest --- pufferlib/ocean/constellation/constellation.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index 1303a659e..c9800c3bf 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -1298,7 +1298,7 @@ int main(void) { (zi - z_min)/(z_max - z_min) }; - Vector2 screen_i = GetWorldToScreenEx(src_point, camera, 960, 520); + Vector2 screen_i = GetWorldToScreenEx(src_point, args1.camera, args1.width, args1.height); for (int i=0; i Date: Sat, 18 Oct 2025 18:09:39 +0000 Subject: [PATCH 070/188] UNSTABLE TESTING. DO NOT USE --- pufferlib/pufferl.py | 130 ++++++++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 56 deletions(-) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index d251a1fa5..dc919154a 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -23,6 +23,7 @@ import psutil import torch +from torch import func import torch.distributed from torch.distributed.elastic.multiprocessing.errors import record import torch.utils.cpp_extension @@ -49,6 +50,10 @@ # Assume advantage kernel has been built if CUDA compiler is available ADVANTAGE_CUDA = shutil.which("nvcc") is not None +# DEBUG FLAG IS A BUG. FUCK THIS DO NOT NOT NOT ENABLE +#torch.autograd.set_detect_anomaly(True) +#torch._dynamo.config.capture_scalar_outputs = True + class PuffeRL: def __init__(self, config, vecenv, policy, logger=None, verbose=True): # Backend perf optimization @@ -304,7 +309,7 @@ def evaluate(self): profile.end() return self.stats - @record + def train(self): profile = self.profile epoch = self.epoch @@ -359,63 +364,14 @@ def train(self): lstm_c=None, ) - logits, newvalue = self.policy(mb_obs, state) - actions, newlogprob, entropy = pufferlib.pytorch.sample_logits(logits, action=mb_actions) - - profile('train_misc', epoch) - newlogprob = newlogprob.reshape(mb_logprobs.shape) - logratio = newlogprob - mb_logprobs - ratio = logratio.exp() - self.ratio[idx] = ratio.detach() + adv = advantages[idx] - with torch.no_grad(): - old_approx_kl = (-logratio).mean() - approx_kl = ((ratio - 1) - logratio).mean() - clipfrac = ((ratio - 1.0).abs() > config['clip_coef']).float().mean() + _compiled_train(self.policy, self.optimizer, + mb_obs, mb_actions, mb_logprobs, mb_rewards, mb_terminals, mb_truncations, + mb_ratio, mb_values, mb_returns, mb_advantages, mb_prio, adv, state, epoch, idx, + config, clip_coef, vf_clip, num_minibatches, mb + ) - adv = advantages[idx] - adv = compute_puff_advantage(mb_values, mb_rewards, mb_terminals, - ratio, adv, config['gamma'], config['gae_lambda'], - config['vtrace_rho_clip'], config['vtrace_c_clip']) - adv = mb_advantages - adv = mb_prio * (adv - adv.mean()) / (adv.std() + 1e-8) - - # Losses - pg_loss1 = -adv * ratio - pg_loss2 = -adv * torch.clamp(ratio, 1 - clip_coef, 1 + clip_coef) - pg_loss = torch.max(pg_loss1, pg_loss2).mean() - - newvalue = newvalue.view(mb_returns.shape) - v_clipped = mb_values + torch.clamp(newvalue - mb_values, -vf_clip, vf_clip) - v_loss_unclipped = (newvalue - mb_returns) ** 2 - v_loss_clipped = (v_clipped - mb_returns) ** 2 - v_loss = 0.5*torch.max(v_loss_unclipped, v_loss_clipped).mean() - - entropy_loss = entropy.mean() - - loss = pg_loss + config['vf_coef']*v_loss - config['ent_coef']*entropy_loss - self.amp_context.__enter__() # TODO: AMP needs some debugging - - # This breaks vloss clipping? - self.values[idx] = newvalue.detach().float() - - # Logging - profile('train_misc', epoch) - losses['policy_loss'] += pg_loss.item() / num_minibatches - losses['value_loss'] += v_loss.item() / num_minibatches - losses['entropy'] += entropy_loss.item() / num_minibatches - losses['old_approx_kl'] += old_approx_kl.item() / num_minibatches - losses['approx_kl'] += approx_kl.item() / num_minibatches - losses['clipfrac'] += clipfrac.item() / num_minibatches - losses['importance'] += ratio.mean().item() / num_minibatches - - # Learn on accumulated minibatches - profile('learn', epoch) - loss.backward() - if (mb + 1) % self.accumulate_minibatches == 0: - torch.nn.utils.clip_grad_norm_(self.policy.parameters(), config['max_grad_norm']) - self.optimizer.step() - self.optimizer.zero_grad() # Reprioritize experience profile('train_misc', epoch) @@ -742,6 +698,68 @@ def clear(self): prof['buffer'] = prof['delta'] prof['delta'] = 0 +def compute_loss(params_and_buffers, policy, mb_obs, + mb_actions, mb_logprobs, mb_rewards, mb_terminals, mb_truncations, + mb_ratio, mb_values, mb_returns, mb_advantages, mb_prio, adv, state, epoch, idx, + config, clip_coef, vf_clip, num_minibatches, mb): + logits, newvalue = func.functional_call(policy, params_and_buffers, mb_obs) + + actions, newlogprob, entropy = pufferlib.pytorch.sample_logits(logits, action=mb_actions) + + #profile('train_misc', epoch) + newlogprob = newlogprob.reshape(mb_logprobs.shape) + logratio = newlogprob - mb_logprobs + ratio = logratio.exp() + #self.ratio[idx] = ratio.detach() + + with torch.no_grad(): + old_approx_kl = (-logratio).mean() + approx_kl = ((ratio - 1) - logratio).mean() + clipfrac = ((ratio - 1.0).abs() > config['clip_coef']).float().mean() + + adv_new = compute_puff_advantage(mb_values, mb_rewards, mb_terminals, + ratio, adv, config['gamma'], config['gae_lambda'], + config['vtrace_rho_clip'], config['vtrace_c_clip']) + adv_new = mb_prio * (adv_new - adv_new.mean()) / (adv_new.std() + 1e-8) + + # Losses + pg_loss1 = -adv_new * ratio + pg_loss2 = -adv_new * torch.clamp(ratio, 1 - clip_coef, 1 + clip_coef) + pg_loss = torch.max(pg_loss1, pg_loss2).mean() + + newvalue = newvalue.view(mb_returns.shape) + v_clipped = mb_values + torch.clamp(newvalue - mb_values, -vf_clip, vf_clip) + v_loss_unclipped = (newvalue - mb_returns) ** 2 + v_loss_clipped = (v_clipped - mb_returns) ** 2 + v_loss = 0.5*torch.max(v_loss_unclipped, v_loss_clipped).mean() + + entropy_loss = entropy.mean() + + loss = pg_loss + config['vf_coef']*v_loss - config['ent_coef']*entropy_loss + return loss + +@torch.compile(fullgraph=True) +def _compiled_train(policy, optimizer, mb_obs, mb_actions, mb_logprobs, mb_rewards, + mb_terminals, mb_truncations, mb_ratio, mb_values, mb_returns, mb_advantages, + mb_prio, adv, state, epoch, idx, config, clip_coef, vf_clip, num_minibatches, mb): + + lr = optimizer.param_groups[0]['lr'] + buffers = dict(policy.named_buffers()) + param_names = [k for k, v in policy.named_parameters() if v.requires_grad] + params = [v for k, v in policy.named_parameters() if v.requires_grad] + params_dict = dict(zip(param_names, params)) + params_and_buffers = {**buffers, **params_dict} + + grad_fn = func.grad(compute_loss, has_aux=False) + grads = grad_fn(params_and_buffers, policy, mb_obs, + mb_actions, mb_logprobs, mb_rewards, mb_terminals, mb_truncations, + mb_ratio, mb_values, mb_returns, mb_advantages, mb_prio, adv, state, epoch, idx, + config, clip_coef, vf_clip, num_minibatches, mb) + + for name, param in zip(param_names, params): + if name in grads: + param.data.sub_(lr * grads[name]) + class Utilization(Thread): def __init__(self, delay=1, maxlen=20): super().__init__() From f168bd608ce2713bca42c80d298e7d39678c393c Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Sat, 18 Oct 2025 20:39:23 +0000 Subject: [PATCH 071/188] BROKEN- DO NOT USE --- pufferlib/config/ocean/breakout.ini | 62 +++++++--- pufferlib/models.py | 120 +++++++++++++++--- pufferlib/pufferl.py | 185 ++++++++++++++++------------ 3 files changed, 255 insertions(+), 112 deletions(-) diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index a4b3698a6..5961d1719 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -2,7 +2,8 @@ package = ocean env_name = puffer_breakout policy_name = Policy -rnn_name = Recurrent +#rnn_name = Recurrent +rnn_name = None [vec] #num_envs = 1 @@ -11,8 +12,8 @@ num_envs = 4 # Experiment - fewer cores per env to avoid clogging [env] -num_envs = 64 -#num_envs = 512 +#num_envs = 64 +num_envs = 2048 frameskip = 1 width = 576 height = 330 @@ -30,29 +31,50 @@ paddle_speed = 620 continuous = 0 [policy] -hidden_size = 512 +hidden_size = 1024 +#hidden_size = 128 [train] -total_timesteps = 40_000_000 -adam_beta1 = 0.9389740236912132 -adam_beta2 = 0.9998225039929157 -adam_eps = 1.0267361590791064e-8 +total_timesteps = 90_000_000 +adam_beta1 = 0.8946507418260217 +adam_beta2 = 0.9 +adam_eps = 0.0001 batch_size = auto bptt_horizon = 64 -clip_coef = 0.01557913923814178 -ent_coef = 0.0031759371032913 -gae_lambda = 0.916681264452842 +clip_coef = 0.19696765958267629 +ent_coef = 0.0005690816545012474 +gae_lambda = 0.747650023961198 gamma = 0.9997053654668936 -learning_rate = 0.012744235594115342 -max_grad_norm = 1.8013800046071862 -num_minibatches = 8 -minibatch_size = 4096 -prio_alpha = 0.9500430793857082 -prio_beta0 = 0.9436845548994959 -vf_clip_coef = 0.1 -vf_coef = 2.5994729835919834 +learning_rate = 0.044482546441415506 +max_grad_norm = 2.2356112188495723 +minibatch_size = 32768 +prio_alpha = 0.98967001208896 +prio_beta0 = 0.09999999999999998 +vf_clip_coef = 2.178492167689251 +vf_coef = 1.6832989594296321 vtrace_c_clip = 2.878171091654008 -vtrace_rho_clip = 1.3235791596831579 +vtrace_rho_clip = 0.7876748061547312 + +#total_timesteps = 40_000_000 +#adam_beta1 = 0.9389740236912132 +#adam_beta2 = 0.9998225039929157 +#adam_eps = 1.0267361590791064e-8 +#batch_size = auto +#bptt_horizon = 64 +#clip_coef = 0.01557913923814178 +#ent_coef = 0.0031759371032913 +#gae_lambda = 0.916681264452842 +#gamma = 0.9997053654668936 +#learning_rate = 0.012744235594115342 +#max_grad_norm = 1.8013800046071862 +#num_minibatches = 8 +#minibatch_size = 4096 +#prio_alpha = 0.9500430793857082 +#prio_beta0 = 0.9436845548994959 +#vf_clip_coef = 0.1 +#vf_coef = 2.5994729835919834 +#vtrace_c_clip = 2.878171091654008 +#vtrace_rho_clip = 1.3235791596831579 [sweep] downsample = 10 diff --git a/pufferlib/models.py b/pufferlib/models.py index b548a3e97..d13d0d5fc 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -97,6 +97,60 @@ def decode_actions(self, hidden): values = self.value(hidden) return logits, values +class Default(nn.Module): + '''Default PyTorch policy. Flattens obs and applies a linear layer. + + PufferLib is not a framework. It does not enforce a base class. + You can use any PyTorch policy that returns actions and values. + We structure our forward methods as encode_observations and decode_actions + to make it easier to wrap policies with LSTMs. You can do that and use + our LSTM wrapper or implement your own. To port an existing policy + for use with our LSTM wrapper, simply put everything from forward() before + the recurrent cell into encode_observations and put everything after + into decode_actions. + ''' + def __init__(self, env, hidden_size=128): + super().__init__() + self.hidden_size = hidden_size + self.is_multidiscrete = isinstance(env.single_action_space, + pufferlib.spaces.MultiDiscrete) + self.is_continuous = isinstance(env.single_action_space, + pufferlib.spaces.Box) + + num_obs = np.prod(env.single_observation_space.shape) + self.encoder = torch.nn.Sequential( + pufferlib.pytorch.layer_init(nn.Linear(num_obs, hidden_size)), + nn.GELU(), + ) + num_atns = env.single_action_space.n + self.decoder = pufferlib.pytorch.layer_init( + nn.Linear(hidden_size, num_atns), std=0.01) + self.value = pufferlib.pytorch.layer_init( + nn.Linear(hidden_size, 1), std=1) + + def forward_eval(self, observations, state=None): + hidden = self.encode_observations(observations, state=state) + logits, values = self.decode_actions(hidden) + return logits, values + + def forward(self, observations, state=None): + return self.forward_eval(observations, state) + + def encode_observations(self, observations, state=None): + '''Encodes a batch of observations into hidden states. Assumes + no time dimension (handled by LSTM wrappers).''' + batch_size = observations.shape[0] + observations = observations.view(batch_size, -1) + return self.encoder(observations.float()) + + def decode_actions(self, hidden): + '''Decodes a batch of hidden states into (multi)discrete actions. + Assumes no time dimension (handled by LSTM wrappers).''' + logits = self.decoder(hidden) + values = self.value(hidden) + return logits, values + + class LSTMWrapper(nn.Module): def __init__(self, env, policy, hidden_size=128): '''Wraps your policy with an LSTM without letting you shoot yourself in the @@ -112,6 +166,7 @@ def __init__(self, env, policy, hidden_size=128): self.hidden_size = hidden_size self.is_continuous = self.policy.is_continuous + ''' for name, param in self.named_parameters(): if 'layer_norm' in name: continue @@ -119,23 +174,22 @@ def __init__(self, env, policy, hidden_size=128): nn.init.constant_(param, 0) elif "weight" in name and param.ndim >= 2: nn.init.orthogonal_(param, 1.0) + ''' - self.lstm = nn.LSTM(input_size, hidden_size) + #self.lstm = nn.LSTM(input_size, hidden_size) self.cell = torch.nn.LSTMCell(input_size, hidden_size) - self.cell.weight_ih = self.lstm.weight_ih_l0 - self.cell.weight_hh = self.lstm.weight_hh_l0 - self.cell.bias_ih = self.lstm.bias_ih_l0 - self.cell.bias_hh = self.lstm.bias_hh_l0 + #self.cell.weight_ih = self.lstm.weight_ih_l0 + #self.cell.weight_hh = self.lstm.weight_hh_l0 + #self.cell.bias_ih = self.lstm.bias_ih_l0 + #self.cell.bias_hh = self.lstm.bias_hh_l0 #self.pre_layernorm = nn.LayerNorm(hidden_size) #self.post_layernorm = nn.LayerNorm(hidden_size) - def forward_eval(self, observations, state): + def forward(self, observations, h, c): '''Forward function for inference. 3x faster than using LSTM directly''' - hidden = self.policy.encode_observations(observations, state=state) - h = state['lstm_h'] - c = state['lstm_c'] + hidden = self.policy.encode_observations(observations) # TODO: Don't break compile if h is not None: @@ -147,14 +201,12 @@ def forward_eval(self, observations, state): #hidden = self.pre_layernorm(hidden) hidden, c = self.cell(hidden, lstm_state) #hidden = self.post_layernorm(hidden) - state['hidden'] = hidden - state['lstm_h'] = hidden - state['lstm_c'] = c logits, values = self.policy.decode_actions(hidden) - return logits, values + return logits, values, hidden, c + ''' def forward(self, observations, state): - '''Forward function for training. Uses LSTM for fast time-batching''' + #Forward function for training. Uses LSTM for fast time-batching x = observations lstm_h = state['lstm_h'] lstm_c = state['lstm_c'] @@ -199,6 +251,46 @@ def forward(self, observations, state): state['lstm_h'] = lstm_h.detach() state['lstm_c'] = lstm_c.detach() return logits, values + ''' + +''' +class LSTMWrapper(nn.Module): + def __init__(self, env, policy, hidden_size=128): + super().__init__() + self.obs_shape = env.single_observation_space.shape + input_size = hidden_size + + self.policy = policy + self.input_size = input_size + self.hidden_size = hidden_size + self.is_continuous = self.policy.is_continuous + + self.cell = sLSTM(input_size, hidden_size, 4) + + #self.cell = torch.nn.LSTMCell(input_size, hidden_size) + #self.cell.weight_ih = self.lstm.weight_ih_l0 + #self.cell.weight_hh = self.lstm.weight_hh_l0 + #self.cell.bias_ih = self.lstm.bias_ih_l0 + #self.cell.bias_hh = self.lstm.bias_hh_l0 + + #self.pre_layernorm = nn.LayerNorm(hidden_size) + #self.post_layernorm = nn.LayerNorm(hidden_size) + + def forward(self, observations, c, n, h, m): + hidden = self.policy.encode_observations(observations) + + # TODO: Don't break compile + if h is not None: + assert h.shape[0] == c.shape[0] == observations.shape[0], 'LSTM state must be (h, c)' + lstm_state = (h, c) + else: + lstm_state = None + + hidden, c = self.cell(hidden, lstm_state) + logits, values = self.policy.decode_actions(hidden) + return logits, values, hidden, c +''' + class Convolutional(nn.Module): def __init__(self, env, *args, framestack, flat_size, diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index dc919154a..1a4533767 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -132,7 +132,7 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): self.policy = policy if config['compile']: self.policy = torch.compile(policy, mode=config['compile_mode']) - self.policy.forward_eval = torch.compile(policy, mode=config['compile_mode']) + #self.policy.forward_eval = torch.compile(policy, mode=config['compile_mode']) pufferlib.pytorch.sample_logits = torch.compile(pufferlib.pytorch.sample_logits, mode=config['compile_mode']) # Optimizer @@ -251,7 +251,11 @@ def evaluate(self): state['lstm_h'] = self.lstm_h[env_id.start] state['lstm_c'] = self.lstm_c[env_id.start] - logits, value = self.policy.forward_eval(o_device, state) + #h, c = state['lstm_h'], state['lstm_c'] + logits, value = self.policy(o_device) + #logits, value, h, c = self.policy(o_device, h, c) + #state['lstm_h'] = h + #state['lstm_c'] = c action, logprob, _ = pufferlib.pytorch.sample_logits(logits) r = torch.clamp(r, -1, 1) @@ -311,69 +315,19 @@ def evaluate(self): def train(self): - profile = self.profile epoch = self.epoch + profile = self.profile profile('train', epoch) - losses = defaultdict(float) - config = self.config - device = config['device'] - - b0 = config['prio_beta0'] - a = config['prio_alpha'] - clip_coef = config['clip_coef'] - vf_clip = config['vf_clip_coef'] - anneal_beta = b0 + (1 - b0)*a*self.epoch/self.total_epochs - self.ratio[:] = 1 - - num_minibatches = config['num_minibatches'] - for mb in range(num_minibatches): - profile('train_misc', epoch, nest=True) - self.amp_context.__enter__() - - shape = self.values.shape - advantages = torch.zeros(shape, device=device) - advantages = compute_puff_advantage(self.values, self.rewards, - self.terminals, self.ratio, advantages, config['gamma'], - config['gae_lambda'], config['vtrace_rho_clip'], config['vtrace_c_clip']) - - profile('train_copy', epoch) - adv = advantages.abs().sum(axis=1) - prio_weights = torch.nan_to_num(adv**a, 0, 0, 0) - prio_probs = (prio_weights + 1e-6)/(prio_weights.sum() + 1e-6) - idx = torch.multinomial(prio_probs, - self.minibatch_segments, replacement=True) - mb_prio = (self.segments*prio_probs[idx, None])**-anneal_beta - mb_obs = self.observations[idx] - mb_actions = self.actions[idx] - mb_logprobs = self.logprobs[idx] - mb_rewards = self.rewards[idx] - mb_terminals = self.terminals[idx] - mb_truncations = self.truncations[idx] - mb_ratio = self.ratio[idx] - mb_values = self.values[idx] - mb_returns = advantages[idx] + mb_values - mb_advantages = advantages[idx] - - profile('train_forward', epoch) - if not config['use_rnn']: - mb_obs = mb_obs.reshape(-1, *self.vecenv.single_observation_space.shape) - - state = dict( - action=mb_actions, - lstm_h=None, - lstm_c=None, - ) - - adv = advantages[idx] - - _compiled_train(self.policy, self.optimizer, - mb_obs, mb_actions, mb_logprobs, mb_rewards, mb_terminals, mb_truncations, - mb_ratio, mb_values, mb_returns, mb_advantages, mb_prio, adv, state, epoch, idx, - config, clip_coef, vf_clip, num_minibatches, mb - ) - + profile('train_learn', epoch, nest=True) + + _compiled_train(self.policy, self.optimizer, self.observations, self.actions, self.logprobs, self.rewards, + self.terminals, self.truncations, self.ratio, self.values, self.epoch, self.total_epochs, + self.minibatch_segments, self.segments, self.vecenv.single_observation_space.shape, + int(self.vecenv.single_action_space.n), self.config) + profile('train_misc', epoch) # Reprioritize experience + ''' profile('train_misc', epoch) if config['anneal_lr']: self.scheduler.step() @@ -383,19 +337,26 @@ def train(self): var_y = y_true.var() explained_var = torch.nan if var_y == 0 else (1 - (y_true - y_pred).var() / var_y).item() losses['explained_variance'] = explained_var + ''' profile.end() + + config = self.config + + logs = None self.epoch += 1 done_training = self.global_step >= config['total_timesteps'] if done_training or self.global_step == 0 or time.time() > self.last_log_time + 0.25: logs = self.mean_and_log() - self.losses = losses + #self.losses = losses self.print_dashboard() self.stats = defaultdict(list) self.last_log_time = time.time() self.last_log_step = self.global_step profile.clear() + if self.epoch == 1: + profile.reset() if self.epoch % config['checkpoint_interval'] == 0 or done_training: self.save_checkpoint() @@ -654,10 +615,13 @@ def dist_mean(value, device): class Profile: def __init__(self, frequency=5): - self.profiles = defaultdict(lambda: defaultdict(float)) + self.reset() self.frequency = frequency self.stack = [] + def reset(self): + self.profiles = defaultdict(lambda: defaultdict(float)) + def __iter__(self): return iter(self.profiles.items()) @@ -700,10 +664,32 @@ def clear(self): def compute_loss(params_and_buffers, policy, mb_obs, mb_actions, mb_logprobs, mb_rewards, mb_terminals, mb_truncations, - mb_ratio, mb_values, mb_returns, mb_advantages, mb_prio, adv, state, epoch, idx, - config, clip_coef, vf_clip, num_minibatches, mb): + mb_ratio, mb_values, mb_returns, mb_advantages, mb_prio, adv, epoch, idx, + config, clip_coef, vf_clip, num_minibatches, mb, horizon, num_atns, segments): + segments = int(segments) + num_atns = int(num_atns) + #logits = torch.empty(segments, horizon, num_atns, device=config['device']) + #newvalue = torch.empty(segments, horizon, 1, device=config['device']) + logits = [] + newvalue = [] + #h = None + #c = None + h = torch.zeros(segments, 128, device=config['device']) + c = torch.zeros(segments, 128, device=config['device']) + + #for t in range(horizon): + # #l, n, h, c = func.functional_call(policy, params_and_buffers, (mb_obs[:, t], h, c)) + # l, n, h, c = func.functional_call(policy, params_and_buffers, (mb_obs[:, t], None, None)) + # #logits[:, t] = l + # #newvalue[:, t] = n + # logits.append(l) + # newvalue.append(n) + logits, newvalue = func.functional_call(policy, params_and_buffers, mb_obs) + #logits = torch.cat(logits, dim=0) + #newvalue = torch.stack(newvalue, dim=1) + actions, newlogprob, entropy = pufferlib.pytorch.sample_logits(logits, action=mb_actions) #profile('train_misc', epoch) @@ -739,26 +725,69 @@ def compute_loss(params_and_buffers, policy, mb_obs, return loss @torch.compile(fullgraph=True) -def _compiled_train(policy, optimizer, mb_obs, mb_actions, mb_logprobs, mb_rewards, - mb_terminals, mb_truncations, mb_ratio, mb_values, mb_returns, mb_advantages, - mb_prio, adv, state, epoch, idx, config, clip_coef, vf_clip, num_minibatches, mb): +def _compiled_train(policy, optimizer, observations, actions, logprobs, rewards, + terminals, truncations, ratio, values, epoch, total_epochs, + minibatch_segments, segments, obs_shape, atn_shape, config): + + config['device'] + horizon = config['bptt_horizon'] + device = config['device'] + b0 = config['prio_beta0'] + a = config['prio_alpha'] + clip_coef = config['clip_coef'] + vf_clip = config['vf_clip_coef'] + anneal_beta = b0 + (1 - b0)*a*epoch/total_epochs + + ratio = torch.ones_like(values) + + num_minibatches = config['num_minibatches'] lr = optimizer.param_groups[0]['lr'] - buffers = dict(policy.named_buffers()) + buffers = {}#dict(policy.named_buffers()) param_names = [k for k, v in policy.named_parameters() if v.requires_grad] params = [v for k, v in policy.named_parameters() if v.requires_grad] params_dict = dict(zip(param_names, params)) params_and_buffers = {**buffers, **params_dict} - grad_fn = func.grad(compute_loss, has_aux=False) - grads = grad_fn(params_and_buffers, policy, mb_obs, - mb_actions, mb_logprobs, mb_rewards, mb_terminals, mb_truncations, - mb_ratio, mb_values, mb_returns, mb_advantages, mb_prio, adv, state, epoch, idx, - config, clip_coef, vf_clip, num_minibatches, mb) - - for name, param in zip(param_names, params): - if name in grads: - param.data.sub_(lr * grads[name]) + for mb in range(num_minibatches): + shape = values.shape + advantages = torch.zeros(shape, device=device) + advantages = compute_puff_advantage(values, rewards, + terminals, ratio, advantages, config['gamma'], + config['gae_lambda'], config['vtrace_rho_clip'], config['vtrace_c_clip']) + + adv = advantages.abs().sum(axis=1) + prio_weights = torch.nan_to_num(adv**a, 0, 0, 0) + prio_probs = (prio_weights + 1e-6)/(prio_weights.sum() + 1e-6) + idx = torch.multinomial(prio_probs, + minibatch_segments, replacement=True) + mb_prio = (segments*prio_probs[idx, None])**-anneal_beta + mb_obs = observations[idx] + mb_actions = actions[idx] + mb_logprobs = logprobs[idx] + mb_rewards = rewards[idx] + mb_terminals = terminals[idx] + mb_truncations = truncations[idx] + mb_ratio = ratio[idx] + mb_values = values[idx] + mb_returns = advantages[idx] + mb_values + mb_advantages = advantages[idx] + + if not config['use_rnn']: + mb_obs = mb_obs.reshape(-1, *obs_shape) + + + adv = advantages[idx] + + grad_fn = func.grad(compute_loss, has_aux=False) + grads = grad_fn(params_and_buffers, policy, mb_obs, + mb_actions, mb_logprobs, mb_rewards, mb_terminals, mb_truncations, + mb_ratio, mb_values, mb_returns, mb_advantages, mb_prio, adv, epoch, idx, + config, clip_coef, vf_clip, num_minibatches, mb, horizon, atn_shape, minibatch_segments) + + for name, param in zip(param_names, params): + if name in grads: + param.data.sub_(lr * grads[name]) class Utilization(Thread): def __init__(self, delay=1, maxlen=20): From 9d23693632b2d844f9184a6fe1f54243f97699c9 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Sun, 19 Oct 2025 01:36:18 +0000 Subject: [PATCH 072/188] Initial cuda bind --- pufferlib/extensions/cuda/squared.cu | 141 +++++++++++++ pufferlib/extensions/cuda/squared.hpp | 47 +++++ pufferlib/extensions/cuda/squared_torch.cu | 234 +++++++++++++++++++++ setup.py | 6 +- test_squared_torch.py | 34 +++ 5 files changed, 460 insertions(+), 2 deletions(-) create mode 100644 pufferlib/extensions/cuda/squared.cu create mode 100644 pufferlib/extensions/cuda/squared.hpp create mode 100644 pufferlib/extensions/cuda/squared_torch.cu create mode 100644 test_squared_torch.py diff --git a/pufferlib/extensions/cuda/squared.cu b/pufferlib/extensions/cuda/squared.cu new file mode 100644 index 000000000..9f1371cd6 --- /dev/null +++ b/pufferlib/extensions/cuda/squared.cu @@ -0,0 +1,141 @@ +//#include "squared.hpp" +#include +#include +#include +#include +#include "squared.hpp" + + +// Device: Reset environment +__device__ void cuda_reset(Squared* env, curandState* rng) { + int tiles = env->size * env->size; + int center = env->size / 2 * env->size + env->size / 2; + + // Clear grid + for (int i = 0; i < tiles; i++) { + env->observations[i] = EMPTY; + } + + // Place agent at center + env->observations[center] = AGENT; + env->r = env->size / 2; + env->c = env->size / 2; + env->tick = 0; + + // Place target randomly (not on agent) + int target_idx; + do { + target_idx = curand(rng) % tiles; + } while (target_idx == center); + + env->observations[target_idx] = TARGET; +} + +// Device: Step environment +__device__ void cuda_step(Squared* env) { + env->tick += 1; + int action = env->actions[0]; + env->terminals[0] = 0; + env->rewards[0] = 0.0f; + + int pos = env->r * env->size + env->c; + env->observations[pos] = EMPTY; // Clear old agent pos + + // Move agent + if (action == DOWN) { + env->r += 1; + } else if (action == UP) { + env->r -= 1; + } else if (action == RIGHT) { + env->c += 1; + } else if (action == LEFT) { + env->c -= 1; + } + + pos = env->r * env->size + env->c; + + // Check bounds and timeout + if (env->r < 0 || env->c < 0 || env->r >= env->size || env->c >= env->size || + env->tick > 3 * env->size) { + env->terminals[0] = 1; + env->rewards[0] = -1.0f; + env->log.perf += 0; + env->log.score += -1.0f; + env->log.episode_return += -1.0f; + env->log.episode_length += env->tick; + env->log.n += 1; + cuda_reset(env, &env->rng); + return; + } + + // Check if reached target + if (env->observations[pos] == TARGET) { + env->terminals[0] = 1; + env->rewards[0] = 1.0f; + env->log.perf += 1; + env->log.score += 1.0f; + env->log.episode_return += 1.0f; + env->log.episode_length += env->tick; + env->log.n += 1; + cuda_reset(env, &env->rng); + return; + } + + // Place agent + env->observations[pos] = AGENT; +} + +// Kernel: Step all environments +__global__ void step_environments(Squared* envs, int num_envs) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_envs) return; + cuda_step(&envs[idx]); +} + +// Kernel: Reset specific environments +__global__ void reset_environments(Squared* envs, int* indices, int num_reset) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_reset) return; + int env_idx = indices[idx]; + cuda_reset(&envs[env_idx], &envs[env_idx].rng); +} + +// Kernel: Initialize all environments +__global__ void init_environments(Squared* envs, + unsigned char* obs_mem, + int* actions_mem, + float* rewards_mem, + unsigned char* terminals_mem, + int num_envs, + int grid_size) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_envs) return; + + Squared* env = &envs[idx]; + + // Initialize log + env->log.perf = 0; + env->log.score = 0; + env->log.episode_return = 0; + env->log.episode_length = 0; + env->log.n = 0; + + // Set pointers into memory pools + env->observations = obs_mem + idx * grid_size * grid_size; + env->actions = actions_mem + idx; + env->rewards = rewards_mem + idx; + env->terminals = terminals_mem + idx; + + env->size = grid_size; + env->tick = 0; + env->r = grid_size / 2; + env->c = grid_size / 2; + + // Initialize RNG + curand_init(clock64(), idx, 0, &env->rng); + + // Initial reset + cuda_reset(env, &env->rng); +} + + diff --git a/pufferlib/extensions/cuda/squared.hpp b/pufferlib/extensions/cuda/squared.hpp new file mode 100644 index 000000000..1aedd7270 --- /dev/null +++ b/pufferlib/extensions/cuda/squared.hpp @@ -0,0 +1,47 @@ +#pragma once + +#include +#include + +static constexpr unsigned char NOOP = 0; +static constexpr unsigned char DOWN = 1; +static constexpr unsigned char UP = 2; +static constexpr unsigned char LEFT = 3; +static constexpr unsigned char RIGHT = 4; + +static constexpr unsigned char EMPTY = 0; +static constexpr unsigned char AGENT = 1; +static constexpr unsigned char TARGET = 2; + +struct Log { + float perf; + float score; + float episode_return; + float episode_length; + float n; +}; + +struct Squared { + Log log; + unsigned char* observations; + int* actions; + float* rewards; + unsigned char* terminals; + int size; + int tick; + int r, c; + curandState rng; +}; + +__global__ void init_environments( + Squared* envs, + unsigned char* obs_mem, + int* actions_mem, + float* rewards_mem, + unsigned char* terminals_mem, + int num_envs, + int grid_size +); + +__global__ void step_environments(Squared* envs, int num_envs); +__global__ void reset_environments(Squared* envs, int* indices, int num_reset); diff --git a/pufferlib/extensions/cuda/squared_torch.cu b/pufferlib/extensions/cuda/squared_torch.cu new file mode 100644 index 000000000..a05f4ae22 --- /dev/null +++ b/pufferlib/extensions/cuda/squared_torch.cu @@ -0,0 +1,234 @@ +#include +#include + +#include +#include +#include + +static constexpr unsigned char NOOP = 0; +static constexpr unsigned char DOWN = 1; +static constexpr unsigned char UP = 2; +static constexpr unsigned char LEFT = 3; +static constexpr unsigned char RIGHT = 4; + +static constexpr unsigned char EMPTY = 0; +static constexpr unsigned char AGENT = 1; +static constexpr unsigned char TARGET = 2; + +struct Log { + float perf; + float score; + float episode_return; + float episode_length; + float n; +}; + +struct __align__(16) Squared { + curandState rng; + unsigned char* observations; + int* actions; + float* rewards; + unsigned char* terminals; + int size; + int tick; + int r, c; + Log log; + int padding[3]; +}; + + +// Device: Reset environment +__device__ void cuda_reset(Squared* env, curandState* rng) { + int tiles = env->size * env->size; + int center = env->size / 2 * env->size + env->size / 2; + + // Clear grid + for (int i = 0; i < tiles; i++) { + env->observations[i] = EMPTY; + } + + // Place agent at center + env->observations[center] = AGENT; + env->r = env->size / 2; + env->c = env->size / 2; + env->tick = 0; + + // Place target randomly (not on agent) + int target_idx; + do { + target_idx = curand(rng) % tiles; + } while (target_idx == center); + + env->observations[target_idx] = TARGET; +} + +// Device: Step environment +__device__ void cuda_step(Squared* env) { + env->tick += 1; + int action = env->actions[0]; + env->terminals[0] = 0; + /* + env->rewards[0] = 0.0f; + + int pos = env->r * env->size + env->c; + env->observations[pos] = EMPTY; // Clear old agent pos + + // Move agent + if (action == DOWN) { + env->r += 1; + } else if (action == UP) { + env->r -= 1; + } else if (action == RIGHT) { + env->c += 1; + } else if (action == LEFT) { + env->c -= 1; + } + + pos = env->r * env->size + env->c; + + // Check bounds and timeout + if (env->r < 0 || env->c < 0 || env->r >= env->size || env->c >= env->size || + env->tick > 3 * env->size) { + env->terminals[0] = 1; + env->rewards[0] = -1.0f; + env->log.perf += 0; + env->log.score += -1.0f; + env->log.episode_return += -1.0f; + env->log.episode_length += env->tick; + env->log.n += 1; + cuda_reset(env, &env->rng); + return; + } + + // Check if reached target + if (env->observations[pos] == TARGET) { + env->terminals[0] = 1; + env->rewards[0] = 1.0f; + env->log.perf += 1; + env->log.score += 1.0f; + env->log.episode_return += 1.0f; + env->log.episode_length += env->tick; + env->log.n += 1; + cuda_reset(env, &env->rng); + return; + } + + // Place agent + env->observations[pos] = AGENT; + */ +} + +// Kernel: Step all environments +__global__ void step_environments(Squared* envs, int num_envs) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_envs) return; + cuda_step(&envs[idx]); +} + +// Kernel: Reset specific environments +__global__ void reset_environments(Squared* envs, int* indices, int num_reset) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_reset) return; + int env_idx = indices[idx]; + cuda_reset(&envs[env_idx], &envs[env_idx].rng); +} + +// Kernel: Initialize all environments +__global__ void init_environments(Squared* envs, + unsigned char* obs_mem, + int* actions_mem, + float* rewards_mem, + unsigned char* terminals_mem, + int num_envs, + int grid_size) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_envs) return; + + Squared* env = &envs[idx]; + + // Initialize log + env->log.perf = 0; + env->log.score = 0; + env->log.episode_return = 0; + env->log.episode_length = 0; + env->log.n = 0; + + // Set pointers into memory pools + env->observations = obs_mem + idx * grid_size * grid_size; + env->actions = actions_mem + idx; + env->rewards = rewards_mem + idx; + env->terminals = terminals_mem + idx; + + env->size = grid_size; + env->tick = 0; + env->r = grid_size / 2; + env->c = grid_size / 2; + + // Initialize RNG + curand_init(clock64(), idx, 0, &env->rng); + + // Initial reset + cuda_reset(env, &env->rng); +} + + +inline dim3 make_grid(int n) { + return dim3((n + 255) / 256); +} + +std::tuple +create_squared_environments(int64_t num_envs, int64_t grid_size, torch::Tensor dummy) { + auto device = dummy.device(); + TORCH_CHECK(device.type() == at::kCUDA, "Dummy tensor must be on CUDA device"); + + auto envs_tensor = torch::empty({static_cast(num_envs * sizeof(Squared))}, torch::kUInt8).to(device); + auto obs = torch::zeros({num_envs, grid_size, grid_size}, torch::kUInt8).to(device); + auto actions = torch::zeros({num_envs}, torch::kInt32).to(device); + auto rewards = torch::zeros({num_envs}, torch::kFloat32).to(device); + auto terminals = torch::zeros({num_envs}, torch::kUInt8).to(device); + + Squared* envs = reinterpret_cast(envs_tensor.data_ptr()); + + init_environments<<>>( + envs, + obs.data_ptr(), + actions.data_ptr(), + rewards.data_ptr(), + terminals.data_ptr(), + num_envs, + grid_size + ); + cudaDeviceSynchronize(); + + return std::make_tuple(envs_tensor, obs, actions, rewards, terminals); +} + +void step_environments_cuda(torch::Tensor envs_tensor) { + Squared* envs = reinterpret_cast(envs_tensor.data_ptr()); + // YOU HARDCODED THIS HERE + int num_envs = 2048; + + step_environments<<>>(envs, num_envs); + cudaDeviceSynchronize(); +} + +void reset_environments_cuda(torch::Tensor envs_tensor, torch::Tensor indices_tensor) { + Squared* envs = reinterpret_cast(envs_tensor.data_ptr()); + auto indices = indices_tensor.data_ptr(); + int num_reset = indices_tensor.size(0); + + reset_environments<<>>(envs, indices, num_reset); + cudaDeviceSynchronize(); +} + +TORCH_LIBRARY(squared, m) { + m.def("create_squared_environments(int num_envs, int grid_size, Tensor dummy) -> (Tensor, Tensor, Tensor, Tensor, Tensor)"); + m.def("step_environments(Tensor envs) -> ()"); + m.def("reset_environments(Tensor envs, Tensor indices) -> ()"); +} + +TORCH_LIBRARY_IMPL(squared, CUDA, m) { + m.impl("create_squared_environments", &create_squared_environments); + m.impl("step_environments", &step_environments_cuda); + m.impl("reset_environments", &reset_environments_cuda); +} diff --git a/setup.py b/setup.py index 5514d8082..b7c63dae1 100644 --- a/setup.py +++ b/setup.py @@ -167,14 +167,15 @@ def run(self): self.run_command('build_torch') self.run_command('build_c') +extnames = ["pufferlib._C", "squared_torch._C"] class CBuildExt(build_ext): def run(self, *args, **kwargs): - self.extensions = [e for e in self.extensions if e.name != "pufferlib._C"] + self.extensions = [e for e in self.extensions if e.name not in extnames] super().run(*args, **kwargs) class TorchBuildExt(cpp_extension.BuildExtension): def run(self): - self.extensions = [e for e in self.extensions if e.name == "pufferlib._C"] + self.extensions = [e for e in self.extensions if e.name in extnames] super().run() RAYLIB_A = f'{RAYLIB_NAME}/lib/libraylib.a' @@ -218,6 +219,7 @@ def run(self): if shutil.which("nvcc"): extension = CUDAExtension torch_sources.append("pufferlib/extensions/cuda/pufferlib.cu") + torch_sources.append("pufferlib/extensions/cuda/squared_torch.cu") else: extension = CppExtension diff --git a/test_squared_torch.py b/test_squared_torch.py new file mode 100644 index 000000000..7b50e4473 --- /dev/null +++ b/test_squared_torch.py @@ -0,0 +1,34 @@ +import torch +import torch.utils.cpp_extension +try: + from pufferlib import _C +except ImportError: + raise ImportError('Failed to import C/CUDA advantage kernel. If you have non-default PyTorch, try installing with --no-build-isolation') + + +if __name__ == '__main__': + # THIS IS HARDCODED IN CUDA. DO NOT CHANGE + num_envs = 2048 + + steps = 10000 + grid_size = 9 + dummy = torch.zeros(5).cuda() + indices = torch.arange(num_envs).cuda().int() + envs, obs, actions, rewards, terminals = torch.ops.squared.create_squared_environments(num_envs, grid_size, dummy) + atns = torch.randint(0, 5, (num_envs,)).cuda() + actions[:] = atns + + torch.ops.squared.reset_environments(envs, indices) + + import time + start = time.time() + torch.cuda.synchronize() + for i in range(steps): + torch.ops.squared.step_environments(envs) + + torch.cuda.synchronize() + end = time.time() + + print('Steps/sec:', num_envs * steps / (end - start)) + + From 62d361f5463e7ba6d288f95c7d9128768c8e9b70 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Mon, 20 Oct 2025 18:52:28 +0000 Subject: [PATCH 073/188] Build flags --- scripts/build_ocean.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/build_ocean.sh b/scripts/build_ocean.sh index e854b1f74..ecb0dabe6 100755 --- a/scripts/build_ocean.sh +++ b/scripts/build_ocean.sh @@ -72,10 +72,16 @@ FLAGS=( -I./pufferlib/extensions "$SRC_DIR/cJSON.c" "$SRC_DIR/$ENV.c" -o "$ENV" $LINK_ARCHIVES + -lGL -lm -lpthread -ferror-limit=3 -DPLATFORM_DESKTOP + # Bite me + -Werror=incompatible-pointer-types + -Wno-error=incompatible-pointer-types-discards-qualifiers + -Wno-incompatible-pointer-types-discards-qualifiers + -Wno-error=array-parameter ) From 345217463a715787ec75cc65fc73116c6ace7346 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 23 Oct 2025 00:32:55 +0000 Subject: [PATCH 074/188] comprehensive perf test --- compile_puffer.py | 195 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 compile_puffer.py diff --git a/compile_puffer.py b/compile_puffer.py new file mode 100644 index 000000000..21cb44946 --- /dev/null +++ b/compile_puffer.py @@ -0,0 +1,195 @@ +import torch +from torch import nn +from torch.utils.benchmark import Timer +from torch.utils.flop_counter import FlopCounterMode +from torch import func + +from torch.backends import cudnn +cudnn.benchmark = True +cudnn.deterministic = False +cudnn.benchmark_limit = 32 + +torch.set_float32_matmul_precision('high') +torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True + +class Default(nn.Module): + def __init__(self, input_size, hidden_size, output_size): + super().__init__() + self.hidden_size = hidden_size + self.encoder = torch.nn.Sequential( + nn.Linear(input_size, hidden_size), + nn.GELU(), + ) + self.decoder = nn.Linear(hidden_size, output_size) + self.value = nn.Linear(hidden_size, 1) + + def forward(self, observations): + hidden = self.encode_observations(observations) + logits, values = self.decode_actions(hidden) + return logits, values + + def encode_observations(self, observations, state=None): + batch_size = observations.shape[0] + observations = observations.view(batch_size, -1) + return self.encoder(observations) + + def decode_actions(self, hidden): + logits = self.decoder(hidden) + values = self.value(hidden) + return logits, values + + +class LSTMWrapper(nn.Module): + def __init__(self, policy, input_size, hidden_size, output_size): + super().__init__() + self.policy = policy + input_size = hidden_size + + self.input_size = input_size + self.hidden_size = hidden_size + + self.cell = torch.nn.LSTMCell(input_size, hidden_size) + + def forward(self, observations, h, c): + hidden = self.policy.encode_observations(observations) + hidden, c = self.cell(hidden, (h, c)) + logits, values = self.policy.decode_actions(hidden) + return logits, values, hidden, c + +def get_params_and_buffers(model): + buffers = dict(model.named_buffers()) + param_names = [k for k, v in model.named_parameters() if v.requires_grad] + params = [v for k, v in model.named_parameters() if v.requires_grad] + params_dict = dict(zip(param_names, params)) + return {**buffers, **params_dict} + + +@torch.compile(fullgraph=True, dynamic=False, mode='max-autotune') +def functional_forward(model, params_and_buffers, batch, h, c): + return func.functional_call(model, params_and_buffers, (batch, h, c)) + +def rollout(model, params_and_buffers, batch, h, c, seq): + all_logits = [] + all_values = [] + for i in range(seq): + logits, values, h, c = functional_forward(model, params_and_buffers, batch[i], h, c) + all_logits.append(logits) + all_values.append(values) + + logits = torch.stack(all_logits, dim=0) + values = torch.stack(all_values, dim=0) + + return logits, values + +@torch.compile(fullgraph=True, dynamic=False, mode='max-autotune') +def fast_rollout(model, batch, h, c, seq): + logits = torch.empty(seq, batch.shape[1], OUTPUT_SIZE, device=batch.device, dtype=batch.dtype) + values = torch.empty(seq, batch.shape[1], 1, device=batch.device, dtype=batch.dtype) + for i in range(seq): + l, v, h, c = model(batch[i], h, c) + logits[i] = l + values[i] = v + + return logits, values + +def evaluate(model, params_and_buffers, batch, h, c, seq): + with torch.no_grad(), torch.autocast(device_type="cuda", dtype=torch.bfloat16): + return fast_rollout(model, batch, h, c, seq) + +def compute_loss(params_and_buffers, model, batch, h, c, seq): + logits, values = rollout(model, params_and_buffers, batch, h, c, seq) + loss = -torch.log(torch.softmax(logits, dim=-1)).mean() + (values**2).mean() + return loss + +grad_fn = torch.compile(func.grad(compute_loss), + fullgraph=True, dynamic=False, mode='max-autotune') + +@torch.compile(fullgraph=True, dynamic=False, mode='max-autotune') +def train(model, params_and_buffers, batch, h, c, loops, seq): + with torch.no_grad(), torch.autocast(device_type="cuda", dtype=torch.bfloat16): + for _ in range(loops): + grads = grad_fn(params_and_buffers, model, batch, h, c, seq) + for name in grads: + params_and_buffers[name].sub_(0.01 * grads[name]) + + return params_and_buffers + +if __name__ == '__main__': + INPUT_SIZE = 128 + HIDDEN_SIZE = 128 + OUTPUT_SIZE = 4 + B = 256 + SEQ = 64 + LOOPS = 4 + dtype = torch.bfloat16 + + model = LSTMWrapper( + Default(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE), + INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE + ).cuda() + + # TODO: carefully test slowdown from this + params_and_buffers = get_params_and_buffers(model) + #model = torch.compile(model, mode='max-autotune', dynamic=False, fullgraph=True) + + # Create input batch + batch = torch.randn(SEQ, B, INPUT_SIZE).cuda().to(dtype) + + # Define a multi-step function to run multiple forwards in one compiled graph + # Manual FLOPs calculation + I = INPUT_SIZE + H = HIDDEN_SIZE + O = OUTPUT_SIZE + flops = B * (2*I*H + 16*H*H + 2*H*O + 2*H) + + h = torch.zeros(B, HIDDEN_SIZE).cuda().to(dtype) + c = torch.zeros(B, HIDDEN_SIZE).cuda().to(dtype) + + # Warmup + for _ in range(3): + _ = evaluate(model, params_and_buffers, batch, h, c, SEQ) + # Timing + timer = Timer( + stmt='evaluate(model, params_and_buffers, batch, h, c, SEQ)', + globals={ + 'evaluate': evaluate, + 'params_and_buffers': params_and_buffers, + 'model': model, + 'batch': batch, + 'h': h, + 'c': c, + 'SEQ': SEQ, + } + ) + output = timer.timeit(LOOPS) + + cost = output.mean / SEQ # Average time per forward pass (fixed from times[0] to mean) + FLOPS = flops / cost + perf_evaluate = f'FLOPS: {FLOPS / 1e12:.2f}T, SPS: {B/cost/1e6:.2f}M' + + # Warmup + for _ in range(1): + _ = train(model, params_and_buffers, batch, h, c, LOOPS, SEQ) + + # Timing + timer = Timer( + stmt='train(model, params_and_buffers, batch, h, c, LOOPS, SEQ)', + globals={ + 'train': train, + 'params_and_buffers': params_and_buffers, + 'model': model, + 'batch': batch, + 'h': h, + 'c': c, + 'LOOPS': LOOPS, + 'SEQ': SEQ, + } + ) + + output = timer.timeit(1) + cost = output.mean / SEQ / LOOPS # Average time per forward pass (fixed from times[0] to mean) + FLOPS = 3*flops / cost + perf_train = f'FLOPS: {FLOPS / 1e12:.2f}T, SPS: {B/cost/1e6:.2f}M' + + print(perf_evaluate) + print(perf_train) From 883a3d8d607e5236bb53c6e35019caa0c048bd49 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 23 Oct 2025 21:57:25 +0000 Subject: [PATCH 075/188] Iniital puffer cpp --- compile_puffer.py | 11 +- profile_torch.py | 21 +- pufferlib/config/ocean/squared.ini | 6 + pufferlib/extensions/cuda/squared_torch.cu | 18 +- pufferlib/extensions/pufferlib.cpp | 440 +++++++++++++++++++-- pufferlib/models.py | 72 +++- pufferlib/pufferl.py | 352 ++++++++++------- setup.py | 4 +- 8 files changed, 710 insertions(+), 214 deletions(-) diff --git a/compile_puffer.py b/compile_puffer.py index 21cb44946..73d6a665c 100644 --- a/compile_puffer.py +++ b/compile_puffer.py @@ -64,7 +64,7 @@ def get_params_and_buffers(model): return {**buffers, **params_dict} -@torch.compile(fullgraph=True, dynamic=False, mode='max-autotune') +@torch.compile(fullgraph=True, dynamic=False, mode='reduce-overhead') def functional_forward(model, params_and_buffers, batch, h, c): return func.functional_call(model, params_and_buffers, (batch, h, c)) @@ -81,7 +81,7 @@ def rollout(model, params_and_buffers, batch, h, c, seq): return logits, values -@torch.compile(fullgraph=True, dynamic=False, mode='max-autotune') +@torch.compile(fullgraph=True, dynamic=False, mode='reduce-overhead') def fast_rollout(model, batch, h, c, seq): logits = torch.empty(seq, batch.shape[1], OUTPUT_SIZE, device=batch.device, dtype=batch.dtype) values = torch.empty(seq, batch.shape[1], 1, device=batch.device, dtype=batch.dtype) @@ -102,9 +102,10 @@ def compute_loss(params_and_buffers, model, batch, h, c, seq): return loss grad_fn = torch.compile(func.grad(compute_loss), - fullgraph=True, dynamic=False, mode='max-autotune') + fullgraph=True, dynamic=False, mode='reduce-overhead') + +#grad_fn = func.grad(compute_loss) -@torch.compile(fullgraph=True, dynamic=False, mode='max-autotune') def train(model, params_and_buffers, batch, h, c, loops, seq): with torch.no_grad(), torch.autocast(device_type="cuda", dtype=torch.bfloat16): for _ in range(loops): @@ -130,7 +131,7 @@ def train(model, params_and_buffers, batch, h, c, loops, seq): # TODO: carefully test slowdown from this params_and_buffers = get_params_and_buffers(model) - #model = torch.compile(model, mode='max-autotune', dynamic=False, fullgraph=True) + #model = torch.compile(model, mode='reduce-overhead', dynamic=False, fullgraph=True) # Create input batch batch = torch.randn(SEQ, B, INPUT_SIZE).cuda().to(dtype) diff --git a/profile_torch.py b/profile_torch.py index e144a1127..656e7447a 100644 --- a/profile_torch.py +++ b/profile_torch.py @@ -11,20 +11,21 @@ torch.set_float32_matmul_precision('high') torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = True -INPUT_SIZE = 16 -HIDDEN_SIZE = 128 -OUTPUT_SIZE = 16 -B = 2048 +INPUT_SIZE = 128 +HIDDEN_SIZE1 = 128 +HIDDEN_SIZE2 = 512 +OUTPUT_SIZE = 128 +B = 8192 dtype = torch.bfloat16 inner_loops = 100 # Number of inner iterations to amortize overhead # Define the model with explicit Kaiming uniform initialization to match JAX model = torch.nn.Sequential( - torch.nn.Linear(INPUT_SIZE, HIDDEN_SIZE), + torch.nn.Linear(INPUT_SIZE, HIDDEN_SIZE1), torch.nn.ReLU(), - torch.nn.Linear(HIDDEN_SIZE, HIDDEN_SIZE), + torch.nn.Linear(HIDDEN_SIZE1, HIDDEN_SIZE2), torch.nn.ReLU(), - torch.nn.Linear(HIDDEN_SIZE, OUTPUT_SIZE), + torch.nn.Linear(HIDDEN_SIZE2, OUTPUT_SIZE), ).cuda().to(dtype) # Create input batch @@ -43,9 +44,9 @@ def multi_step(model, batch, inner_loops): # Manual FLOPs calculation to match JAX (ignores bias adds and ReLUs as negligible) flops = ( - 2 * B * INPUT_SIZE * HIDDEN_SIZE + - 2 * B * HIDDEN_SIZE * HIDDEN_SIZE + - 2 * B * HIDDEN_SIZE * OUTPUT_SIZE + 2 * B * INPUT_SIZE * HIDDEN_SIZE1 + + 2 * B * HIDDEN_SIZE1 * HIDDEN_SIZE2 + + 2 * B * HIDDEN_SIZE2 * OUTPUT_SIZE ) # Warmup diff --git a/pufferlib/config/ocean/squared.ini b/pufferlib/config/ocean/squared.ini index ac9f69d0f..5629776a3 100644 --- a/pufferlib/config/ocean/squared.ini +++ b/pufferlib/config/ocean/squared.ini @@ -4,6 +4,12 @@ env_name = puffer_squared policy_name = Policy rnn_name = Recurrent +[vec] +num_envs = 1 + +[policy] +hidden_size = 128 + [env] num_envs = 4096 diff --git a/pufferlib/extensions/cuda/squared_torch.cu b/pufferlib/extensions/cuda/squared_torch.cu index a05f4ae22..b32f131fd 100644 --- a/pufferlib/extensions/cuda/squared_torch.cu +++ b/pufferlib/extensions/cuda/squared_torch.cu @@ -1,10 +1,13 @@ #include +#include #include #include #include #include +namespace pufferlib { + static constexpr unsigned char NOOP = 0; static constexpr unsigned char DOWN = 1; static constexpr unsigned char UP = 2; @@ -203,11 +206,8 @@ create_squared_environments(int64_t num_envs, int64_t grid_size, torch::Tensor d return std::make_tuple(envs_tensor, obs, actions, rewards, terminals); } -void step_environments_cuda(torch::Tensor envs_tensor) { +void step_environments_cuda(torch::Tensor envs_tensor, int64_t num_envs) { Squared* envs = reinterpret_cast(envs_tensor.data_ptr()); - // YOU HARDCODED THIS HERE - int num_envs = 2048; - step_environments<<>>(envs, num_envs); cudaDeviceSynchronize(); } @@ -221,14 +221,4 @@ void reset_environments_cuda(torch::Tensor envs_tensor, torch::Tensor indices_te cudaDeviceSynchronize(); } -TORCH_LIBRARY(squared, m) { - m.def("create_squared_environments(int num_envs, int grid_size, Tensor dummy) -> (Tensor, Tensor, Tensor, Tensor, Tensor)"); - m.def("step_environments(Tensor envs) -> ()"); - m.def("reset_environments(Tensor envs, Tensor indices) -> ()"); -} - -TORCH_LIBRARY_IMPL(squared, CUDA, m) { - m.impl("create_squared_environments", &create_squared_environments); - m.impl("step_environments", &step_environments_cuda); - m.impl("reset_environments", &reset_environments_cuda); } diff --git a/pufferlib/extensions/pufferlib.cpp b/pufferlib/extensions/pufferlib.cpp index a20d58bc2..0b4590f0f 100644 --- a/pufferlib/extensions/pufferlib.cpp +++ b/pufferlib/extensions/pufferlib.cpp @@ -1,27 +1,12 @@ -#include -#include -#include -#include -#include - -extern "C" { - /* Creates a dummy empty _C module that can be imported from Python. - The import from Python will load the .so consisting of this file - in this extension, so that the TORCH_LIBRARY static initializers - below are run. */ - PyObject* PyInit__C(void) - { - static struct PyModuleDef module_def = { - PyModuleDef_HEAD_INIT, - "_C", /* name of module */ - NULL, /* module documentation, may be NULL */ - -1, /* size of per-interpreter state of the module, - or -1 if the module keeps state in global variables. */ - NULL, /* methods */ - }; - return PyModule_Create(&module_def); - } -} +#include +#include + +#include +#include + +#include + +namespace py = pybind11; namespace pufferlib { @@ -92,4 +77,411 @@ TORCH_LIBRARY_IMPL(pufferlib, CPU, m) { m.impl("compute_puff_advantage", &compute_puff_advantage_cpu); } +std::tuple +create_squared_environments(int64_t num_envs, int64_t grid_size, torch::Tensor dummy); + +void step_environments_cuda(torch::Tensor envs_tensor, int64_t num_envs); + +void reset_environments_cuda(torch::Tensor envs_tensor, torch::Tensor indices_tensor); + +void compute_puff_advantage_cuda( + torch::Tensor values, + torch::Tensor rewards, + torch::Tensor dones, + torch::Tensor importance, + torch::Tensor advantages, + double gamma, + double lambda, // Note: 'lambda' is fine as a param name in C++ + double rho_clip, + double c_clip +); + +/* +static torch::jit::Module g_policy; +void set_policy(torch::Tensor serialized_policy) { + std::string model_str(reinterpret_cast(serialized_policy.data_ptr()), serialized_policy.numel()); + std::istringstream model_stream(model_str); + g_policy = torch::jit::load(model_stream); + g_policy.eval(); +} +*/ + +class PolicyLSTM : public torch::nn::Module { +private: + int64_t input_size_; + int64_t hidden_size_; + int64_t num_atns_; + torch::nn::Sequential encoder{nullptr}; + torch::nn::Linear decoder{nullptr}; + torch::nn::Linear value{nullptr}; + torch::nn::LSTM lstm{nullptr}; + torch::nn::LSTMCell cell{nullptr}; + +public: + // Constructor: input_size instead of grid_size + PolicyLSTM(int64_t input_size, int64_t num_atns, int64_t hidden_size = 128) + : input_size_(input_size), hidden_size_(hidden_size), num_atns_(num_atns) { + + encoder = register_module("encoder", torch::nn::Sequential( + torch::nn::Linear(input_size_, hidden_size_), + torch::nn::GELU() + )); + + decoder = register_module("decoder", torch::nn::Linear(hidden_size_, num_atns_)); + + value = register_module("value", torch::nn::Linear(hidden_size_, 1)); + + lstm = register_module("lstm", torch::nn::LSTM(torch::nn::LSTMOptions(hidden_size_, hidden_size_).num_layers(1))); + + cell = register_module("cell", torch::nn::LSTMCell(hidden_size_, hidden_size_)); + + // Share weights between LSTM and LSTMCell + cell->named_parameters()["weight_ih"] = lstm->named_parameters()["weight_ih_l0"]; + cell->named_parameters()["weight_hh"] = lstm->named_parameters()["weight_hh_l0"]; + cell->named_parameters()["bias_ih"] = lstm->named_parameters()["bias_ih_l0"]; + cell->named_parameters()["bias_hh"] = lstm->named_parameters()["bias_hh_l0"]; + + // Initialization + auto encoder_linear = (*encoder)[0]->as(); + torch::nn::init::orthogonal_(encoder_linear->weight, std::sqrt(2.0)); + encoder_linear->bias.data().zero_(); + + torch::nn::init::orthogonal_(decoder->weight, 0.01); + decoder->bias.data().zero_(); + + torch::nn::init::orthogonal_(value->weight, 1.0); + value->bias.data().zero_(); + + torch::nn::init::orthogonal_(lstm->named_parameters()["weight_ih_l0"], 1.0); + torch::nn::init::orthogonal_(lstm->named_parameters()["weight_hh_l0"], 1.0); + lstm->named_parameters()["bias_ih_l0"].data().zero_(); + lstm->named_parameters()["bias_hh_l0"].data().zero_(); + } + + // Forward for evaluation/inference (uses LSTMCell) + std::tuple forward( + torch::Tensor observations, torch::Tensor h, torch::Tensor c) { + int64_t B = observations.size(0); + + // Ensure flat input: [B, input_size] + TORCH_CHECK(observations.dim() == 2 && observations.size(1) == input_size_, + "Observations must be [B, input_size]"); + + if (h.defined() && h.numel() > 0) { + TORCH_CHECK(h.dim() == 2 && h.size(0) == B && h.size(1) == hidden_size_, + "h must be [B, hidden_size]"); + TORCH_CHECK(c.dim() == 2 && c.size(0) == B && c.size(1) == hidden_size_, + "c must be [B, hidden_size]"); + } + + auto hidden = encoder->forward(observations.to(torch::kFloat32)); + + std::tuple cell_out; + if (h.defined() && h.numel() > 0) { + cell_out = cell->forward(hidden, std::make_optional(std::make_tuple(h, c))); + } else { + cell_out = cell->forward(hidden); + } + + auto hidden_out = std::get<0>(cell_out); + auto c_out = std::get<1>(cell_out); + + auto logits = decoder->forward(hidden_out); + auto values = value->forward(hidden_out); + + return {logits, values, hidden_out, c_out}; + } + + // Forward for training (uses LSTM) + std::tuple forward_train( + torch::Tensor observations, torch::Tensor lstm_h, torch::Tensor lstm_c) { + auto x = observations; + auto x_shape = x.sizes(); + + // Expecting [B, TT, input_size] or [B, input_size] + TORCH_CHECK((x.dim() == 2 || x.dim() == 3), + "Observations must be [B, input_size] or [B, TT, input_size]"); + TORCH_CHECK(x.size(-1) == input_size_, + "Last dimension of observations must match input_size"); + + int64_t B = x_shape[0]; + int64_t TT = (x.dim() == 3) ? x_shape[1] : 1; + + if (lstm_h.defined() && lstm_h.numel() > 0) { + TORCH_CHECK(lstm_h.dim() == 3 && lstm_h.size(0) == 1 && lstm_h.size(1) == B, + "lstm_h must be [1, B, hidden_size]"); + TORCH_CHECK(lstm_c.dim() == 3 && lstm_c.size(0) == 1 && lstm_c.size(1) == B, + "lstm_c must be [1, B, hidden_size]"); + } + + // Flatten time steps if needed + if (x.dim() == 3) { + x = x.reshape({B * TT, input_size_}); + } else { + TT = 1; + } + + auto hidden = encoder->forward(x.to(torch::kFloat32)); + + hidden = hidden.reshape({B, TT, hidden_size_}); + hidden = hidden.transpose(0, 1); // [TT, B, hidden_size] + + std::tuple> lstm_out; + if (lstm_h.defined() && lstm_h.numel() > 0) { + lstm_out = lstm->forward(hidden, std::make_optional(std::make_tuple(lstm_h, lstm_c))); + } else { + lstm_out = lstm->forward(hidden); + } + + hidden = std::get<0>(lstm_out); + hidden = hidden.to(torch::kFloat32); + hidden = hidden.transpose(0, 1); // [B, TT, hidden_size] + + auto flat_hidden = hidden.reshape({-1, hidden_size_}); + auto logits = decoder->forward(flat_hidden); + auto values = value->forward(flat_hidden); + + logits = logits.reshape({B, TT, num_atns_}); + values = values.reshape({B, TT, 1}); + + return {logits, values}; + } +}; + +// Updated compiled_evaluate +std::tuple compiled_evaluate( + torch::Tensor envs_tensor, + torch::Tensor obs, + torch::Tensor actions, + torch::Tensor rewards, + torch::Tensor terminals, + pybind11::object policy_obj, // pybind11::object for Python compatibility + torch::Tensor lstm_h, + torch::Tensor lstm_c, + torch::Tensor obs_buffer, + torch::Tensor act_buffer, + torch::Tensor logprob_buffer, + torch::Tensor rew_buffer, + torch::Tensor term_buffer, + torch::Tensor val_buffer, + int64_t horizon, + int64_t num_envs +) { + auto& policy = policy_obj.cast(); + + // No-grad guard + torch::NoGradGuard no_grad; + + for (int64_t i = 0; i < horizon; ++i) { + // Clamp rewards + auto r = rewards.clamp(-1.0f, 1.0f); + + // Policy forward: Native C++ call + auto [logits, value, lstm_h_out, lstm_c_out] = policy.forward(obs, lstm_h, lstm_c); + lstm_h = lstm_h_out; + lstm_c = lstm_c_out; + + // Sample action and logprob (assuming discrete categorical from logits) + auto max_logits = logits.amax(1, true); + auto logits_shifted = logits - max_logits; + auto logsumexp = logits_shifted.exp().sum(1, true).log() + max_logits; + auto logprobs = logits - logsumexp; + auto probs = logprobs.exp(); + auto action = at::multinomial(probs, 1, /*replacement=*/true); + auto logprob = logprobs.gather(1, action).squeeze(1); + action = action.squeeze(1); + + // Store to buffers + obs_buffer.select(1, i).copy_(obs); + act_buffer.select(1, i).copy_(action); + logprob_buffer.select(1, i).copy_(logprob); + rew_buffer.select(1, i).copy_(r); + term_buffer.select(1, i).copy_(terminals.to(torch::kFloat32)); + val_buffer.select(1, i).copy_(value.flatten()); + + // Step the environments + actions.copy_(action); + step_environments_cuda(envs_tensor, num_envs); + } + + return std::make_tuple(lstm_h, lstm_c); +} + +// Updated compiled_train +pybind11::dict compiled_train( + torch::Tensor observations, // [num_envs, horizon, grid_size, grid_size] uint8 + torch::Tensor actions, // [num_envs, horizon] int32 + torch::Tensor logprobs, // [num_envs, horizon] float + torch::Tensor rewards, // [num_envs, horizon] float + torch::Tensor terminals, // [num_envs, horizon] float + torch::Tensor truncations, // [num_envs, horizon] float (included but not used in loop) + torch::Tensor ratio, // [num_envs, horizon] float + torch::Tensor values, // [num_envs, horizon] float + pybind11::object policy_obj, + pybind11::object optimizer, + pybind11::object scheduler, + int64_t total_minibatches, + int64_t minibatch_segments, + int64_t segments, // num_envs + int64_t accumulate_minibatches, + int64_t horizon, + double prio_beta0, + double prio_alpha, + double clip_coef, + double vf_clip_coef, + double gamma, + double gae_lambda, + double vtrace_rho_clip, + double vtrace_c_clip, + double vf_coef, + double ent_coef, + double max_grad_norm, + bool use_rnn, + bool anneal_lr, + int64_t total_epochs, + int64_t current_epoch +) { + auto& policy = policy_obj.cast(); + + // Compute anneal_beta + double anneal_beta = prio_beta0 + (1.0 - prio_beta0) * prio_alpha * static_cast(current_epoch) / total_epochs; + + // Compute advantages + auto advantages = torch::zeros_like(values); + compute_puff_advantage_cuda(values, rewards, terminals, ratio, advantages, gamma, gae_lambda, vtrace_rho_clip, vtrace_c_clip); + + // Prioritize + auto adv = advantages.abs().sum(1); + auto prio_weights = adv.pow(prio_alpha).nan_to_num_(0.0, 0.0, 0.0); + auto sum_weights = prio_weights.sum() + static_cast(adv.size(0)) * 1e-6; + auto prio_probs = (prio_weights + 1e-6) / sum_weights; + + auto device = values.device(); + auto pg_sum = torch::zeros({}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + auto v_sum = torch::zeros({}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + auto ent_sum = torch::zeros({}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + auto total_sum = torch::zeros({}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + + for (int64_t mb = 0; mb < total_minibatches; ++mb) { + auto idx = at::multinomial(prio_probs, minibatch_segments, /*replacement=*/true); + auto prio_probs_mb = prio_probs.index_select(0, idx).unsqueeze(1); + auto mb_prio = torch::pow(static_cast(segments) * prio_probs_mb, -anneal_beta); + + // Select minibatch tensors + auto mb_obs = observations.index_select(0, idx); + auto mb_actions = actions.index_select(0, idx); + auto mb_logprobs = logprobs.index_select(0, idx); + auto mb_rewards = rewards.index_select(0, idx); + auto mb_terminals = terminals.index_select(0, idx); + auto mb_ratio = ratio.index_select(0, idx); // Not used directly + auto mb_values = values.index_select(0, idx); + auto mb_advantages = advantages.index_select(0, idx); + auto mb_returns = mb_advantages + mb_values; + + auto original_obs_shape = mb_obs.sizes(); // [minibatch_segments, horizon, grid_size, grid_size] + if (!use_rnn) { + mb_obs = mb_obs.reshape({-1, original_obs_shape[2], original_obs_shape[3]}); + } + + // Initial LSTM states (undefined for zero init) + torch::Tensor mb_lstm_h; + torch::Tensor mb_lstm_c; + + // Policy forward: Native C++ call + auto [logits, newvalue] = policy.forward_train(mb_obs, mb_lstm_h, mb_lstm_c); + + // Compute newlogprob and entropy (discrete assumption) + auto flat_batch = minibatch_segments * horizon; + auto flat_logits = logits.reshape({flat_batch, -1}); + auto flat_actions = mb_actions.reshape({flat_batch}); + auto max_logits = flat_logits.amax(1, true); + auto logits_shifted = flat_logits - max_logits; + auto logsumexp = (logits_shifted.exp().sum(1, true)).log() + max_logits; + auto logprobs = flat_logits - logsumexp; // Correct + auto probs = logprobs.exp(); + auto newlogprob_flat = logprobs.gather(1, flat_actions.unsqueeze(1)).squeeze(1); + auto newlogprob = newlogprob_flat.reshape({minibatch_segments, horizon}); + auto entropy = - (probs * logprobs).sum(1).mean(); + + auto logratio = newlogprob - mb_logprobs; + auto mb_ratio_new = logratio.exp(); + + // Update full ratio (detach) + ratio.index_copy_(0, idx, mb_ratio_new.detach()); + + // Advantages normalization + auto adv = mb_advantages; + adv = mb_prio * (adv - adv.mean()) / (adv.std() + 1e-8); + + // Policy loss + auto pg_loss1 = -adv * mb_ratio_new; + auto pg_loss2 = -adv * torch::clamp(mb_ratio_new, 1.0 - clip_coef, 1.0 + clip_coef); + auto pg_loss = torch::max(pg_loss1, pg_loss2).mean(); + + // Value loss + newvalue = newvalue.view(mb_returns.sizes()); + auto v_clipped = mb_values + torch::clamp(newvalue - mb_values, -vf_clip_coef, vf_clip_coef); + auto v_loss_unclipped = (newvalue - mb_returns).pow(2); + auto v_loss_clipped = (v_clipped - mb_returns).pow(2); + auto v_loss = 0.5 * torch::max(v_loss_unclipped, v_loss_clipped).mean(); + + // Entropy loss + auto entropy_loss = entropy; // Already mean + + // Total loss + auto loss = pg_loss + vf_coef * v_loss - ent_coef * entropy_loss; + pg_sum += pg_loss.detach(); + v_sum += v_loss.detach(); + ent_sum += entropy.detach(); + total_sum += loss.detach(); + + { + pybind11::gil_scoped_release no_gil; + loss.backward(); + } + + // Update values + values.index_copy_(0, idx, newvalue.detach().to(torch::kFloat32)); + + // Accumulate and step + if ((mb + 1) % accumulate_minibatches == 0) { + pybind11::iterable params_iter = policy_obj.attr("parameters")(); + std::vector params_list; + for (pybind11::handle param : params_iter) { + params_list.push_back(param.cast()); + } + torch::nn::utils::clip_grad_norm_(params_list, max_grad_norm); + optimizer.attr("step")(); + optimizer.attr("zero_grad")(); + } + } + + // Scheduler step if anneal_lr + if (anneal_lr) { + scheduler.attr("step")(); + } + + pybind11::dict losses; + auto num_mb = static_cast(total_minibatches); + losses["pg_loss"] = (pg_sum / num_mb).item(); + losses["v_loss"] = (v_sum / num_mb).item(); + losses["entropy"] = (ent_sum / num_mb).item(); + losses["total_loss"] = (total_sum / num_mb).item(); + return losses; +} + + +// PYBIND11_MODULE with the extension name (pufferlib._C) +PYBIND11_MODULE(_C, m) { + m.def("create_squared_environments", &create_squared_environments); + m.def("step_environments", &step_environments_cuda); + m.def("reset_environments", &reset_environments_cuda); + m.def("compiled_evaluate", &compiled_evaluate); + m.def("compiled_train", &compiled_train); + + py::class_, torch::nn::Module> cls(m, "PolicyLSTM"); + cls.def(py::init()); + cls.def("forward", &pufferlib::PolicyLSTM::forward); + cls.def("forward_train", &pufferlib::PolicyLSTM::forward_train); +} } diff --git a/pufferlib/models.py b/pufferlib/models.py index d13d0d5fc..d9f49fd01 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -117,12 +117,12 @@ def __init__(self, env, hidden_size=128): self.is_continuous = isinstance(env.single_action_space, pufferlib.spaces.Box) - num_obs = np.prod(env.single_observation_space.shape) + num_obs = int(np.prod(env.single_observation_space.shape)) self.encoder = torch.nn.Sequential( pufferlib.pytorch.layer_init(nn.Linear(num_obs, hidden_size)), nn.GELU(), ) - num_atns = env.single_action_space.n + num_atns = int(env.single_action_space.n) self.decoder = pufferlib.pytorch.layer_init( nn.Linear(hidden_size, num_atns), std=0.01) self.value = pufferlib.pytorch.layer_init( @@ -150,6 +150,54 @@ def decode_actions(self, hidden): values = self.value(hidden) return logits, values +def forward_train(policy, lstm, observations, state): + #Forward function for training. Uses LSTM for fast time-batching + x = observations + lstm_h = state['lstm_h'] + lstm_c = state['lstm_c'] + + x_shape, space_shape = x.shape, policy.obs_shape + x_n, space_n = len(x_shape), len(space_shape) + if x_shape[-space_n:] != space_shape: + raise ValueError('Invalid input tensor shape', x.shape) + + if x_n == space_n + 1: + B, TT = x_shape[0], 1 + elif x_n == space_n + 2: + B, TT = x_shape[:2] + else: + raise ValueError('Invalid input tensor shape', x.shape) + + if lstm_h is not None: + assert lstm_h.shape[1] == lstm_c.shape[1] == B, 'LSTM state must be (h, c)' + lstm_state = (lstm_h, lstm_c) + else: + lstm_state = None + + x = x.reshape(B*TT, *space_shape) + hidden = policy.policy.encode_observations(x, state) + assert hidden.shape == (B*TT, policy.input_size) + + hidden = hidden.reshape(B, TT, policy.input_size) + + hidden = hidden.transpose(0, 1) + #hidden = self.pre_layernorm(hidden) + hidden, (lstm_h, lstm_c) = lstm(hidden, lstm_state) + hidden = hidden.float() + + #hidden = self.post_layernorm(hidden) + hidden = hidden.transpose(0, 1) + + flat_hidden = hidden.reshape(B*TT, policy.hidden_size) + logits, values = policy.policy.decode_actions(flat_hidden) + values = values.reshape(B, TT) + #state.batch_logits = logits.reshape(B, TT, -1) + state['hidden'] = hidden + state['lstm_h'] = lstm_h.detach() + state['lstm_c'] = lstm_c.detach() + return logits, values + + class LSTMWrapper(nn.Module): def __init__(self, env, policy, hidden_size=128): @@ -166,7 +214,6 @@ def __init__(self, env, policy, hidden_size=128): self.hidden_size = hidden_size self.is_continuous = self.policy.is_continuous - ''' for name, param in self.named_parameters(): if 'layer_norm' in name: continue @@ -174,15 +221,14 @@ def __init__(self, env, policy, hidden_size=128): nn.init.constant_(param, 0) elif "weight" in name and param.ndim >= 2: nn.init.orthogonal_(param, 1.0) - ''' - #self.lstm = nn.LSTM(input_size, hidden_size) + self.lstm = nn.LSTM(input_size, hidden_size) self.cell = torch.nn.LSTMCell(input_size, hidden_size) - #self.cell.weight_ih = self.lstm.weight_ih_l0 - #self.cell.weight_hh = self.lstm.weight_hh_l0 - #self.cell.bias_ih = self.lstm.bias_ih_l0 - #self.cell.bias_hh = self.lstm.bias_hh_l0 + self.cell.weight_ih = self.lstm.weight_ih_l0 + self.cell.weight_hh = self.lstm.weight_hh_l0 + self.cell.bias_ih = self.lstm.bias_ih_l0 + self.cell.bias_hh = self.lstm.bias_hh_l0 #self.pre_layernorm = nn.LayerNorm(hidden_size) #self.post_layernorm = nn.LayerNorm(hidden_size) @@ -198,15 +244,12 @@ def forward(self, observations, h, c): else: lstm_state = None - #hidden = self.pre_layernorm(hidden) hidden, c = self.cell(hidden, lstm_state) - #hidden = self.post_layernorm(hidden) logits, values = self.policy.decode_actions(hidden) return logits, values, hidden, c - ''' - def forward(self, observations, state): - #Forward function for training. Uses LSTM for fast time-batching + def forward_train(self, observations, state): + '''Forward function for training. Uses LSTM for fast time-batching''' x = observations lstm_h = state['lstm_h'] lstm_c = state['lstm_c'] @@ -251,7 +294,6 @@ def forward(self, observations, state): state['lstm_h'] = lstm_h.detach() state['lstm_c'] = lstm_c.detach() return logits, values - ''' ''' class LSTMWrapper(nn.Module): diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 1a4533767..2d9b1c1a1 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -7,6 +7,7 @@ warnings.filterwarnings('error', category=RuntimeWarning) import os +import io import sys import glob import ast @@ -29,6 +30,7 @@ import torch.utils.cpp_extension import pufferlib +from pufferlib.models import forward_train import pufferlib.sweep import pufferlib.vector import pufferlib.pytorch @@ -37,6 +39,26 @@ except ImportError: raise ImportError('Failed to import C/CUDA advantage kernel. If you have non-default PyTorch, try installing with --no-build-isolation') +#@torch.library.impl_abstract("squared::step_environments") +#def meta_step_environments(envs): +# pass + +class SquaredEnv: + def __init__(self, num_envs, grid_size): + dummy = torch.zeros(5).cuda() + self.env, self.observations, self.actions, self.rewards, self.terminals = _C.create_squared_environments(num_envs, grid_size, dummy) + self.observations = self.observations.view(num_envs, -1) + self.indices = torch.arange(num_envs).cuda().int() + + def reset(self): + _C.reset_environments(self.env, self.indices) + return self.observations + + def step(self, actions): + self.actions[:] = actions + _C.step_environments(self.env) + return self.observations, self.rewards, self.terminals + import rich import rich.traceback from rich.table import Table @@ -55,7 +77,9 @@ #torch._dynamo.config.capture_scalar_outputs = True class PuffeRL: - def __init__(self, config, vecenv, policy, logger=None, verbose=True): + #def __init__(self, config, vecenv, policy, logger=None, verbose=True): + #def __init__(self, config, policy, logger=None, verbose=True): + def __init__(self, config, logger=None, verbose=True): # Backend perf optimization torch.set_float32_matmul_precision('high') torch.backends.cudnn.deterministic = config['torch_deterministic'] @@ -67,12 +91,30 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): #np.random.seed(seed) #torch.manual_seed(seed) + num_envs = 4096 + self.num_envs = num_envs + grid_size = 11 + dummy = torch.zeros(5).cuda() + vecenv = SquaredEnv(num_envs, grid_size) + vecenv.reset() + + from gymnasium.spaces import Box, Discrete + obs_space = Box(low=-1, high=1, shape=(grid_size*grid_size,), dtype=np.float32) + atn_space = Discrete(5) + self.single_observation_space = obs_space + self.single_action_space = atn_space + + policy = _C.PolicyLSTM(grid_size*grid_size, 5, 128) + policy.cuda() + # Vecenv info - vecenv.async_reset(seed) - obs_space = vecenv.single_observation_space - atn_space = vecenv.single_action_space - total_agents = vecenv.num_agents + #vecenv.async_reset(seed) + #obs_space = vecenv.single_observation_space + #atn_space = vecenv.single_action_space + #total_agents = vecenv.num_agents + total_agents = num_envs self.total_agents = total_agents + self.agents_per_batch = total_agents # Experience if config['batch_size'] == 'auto' and config['bptt_horizon'] == 'auto': @@ -111,8 +153,9 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): # LSTM if config['use_rnn']: - n = vecenv.agents_per_batch - h = policy.hidden_size + n = self.agents_per_batch + #h = policy.hidden_size + h = 128 self.lstm_h = {i*n: torch.zeros(n, h, device=device) for i in range(total_agents//n)} self.lstm_c = {i*n: torch.zeros(n, h, device=device) for i in range(total_agents//n)} @@ -121,6 +164,7 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): max_minibatch_size = config['max_minibatch_size'] self.minibatch_size = min(minibatch_size, max_minibatch_size) self.accumulate_minibatches = max(1, minibatch_size // max_minibatch_size) + self.total_minibatches = config['num_minibatches'] self.minibatch_segments = self.minibatch_size // horizon if self.minibatch_segments * horizon != self.minibatch_size: raise pufferlib.APIUsageError( @@ -130,6 +174,7 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): # Torch compile self.uncompiled_policy = policy self.policy = policy + #self.compiled_policy = torch.compile(policy) if config['compile']: self.policy = torch.compile(policy, mode=config['compile_mode']) #self.policy.forward_eval = torch.compile(policy, mode=config['compile_mode']) @@ -172,10 +217,10 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): # Automatic mixed precision precision = config['precision'] self.amp_context = contextlib.nullcontext() - if config.get('amp', True) and config['device'] == 'cuda': - self.amp_context = torch.amp.autocast(device_type='cuda', dtype=getattr(torch, precision)) - if precision not in ('float32', 'bfloat16'): - raise pufferlib.APIUsageError(f'Invalid precision: {precision}: use float32 or bfloat16') + #if config.get('amp', True) and config['device'] == 'cuda': + # self.amp_context = torch.amp.autocast(device_type='cuda', dtype=getattr(torch, precision)) + #if precision not in ('float32', 'bfloat16'): + # raise pufferlib.APIUsageError(f'Invalid precision: {precision}: use float32 or bfloat16') # Initializations self.config = config @@ -207,156 +252,139 @@ def sps(self): return (self.global_step - self.last_log_step) / (time.time() - self.last_log_time) + def _compiled_evaluate(self, lstm_h, lstm_c): + config = self.config + device = config['device'] + o = self.vecenv.observations + r = self.vecenv.rewards + d = self.vecenv.terminals + t = torch.zeros(o.shape[0], dtype=torch.bool, device=device) + for i in range(config['bptt_horizon']): + done_mask = d + self.global_step += self.num_envs + + with torch.no_grad(), self.amp_context: + logits, value, lstm_h, lstm_c = self.policy(o, lstm_h, lstm_c) + action, logprob, _ = pufferlib.pytorch.sample_logits(logits) + r = torch.clamp(r, -1, 1) + + with torch.no_grad(): + self.observations[:, i] = o + self.actions[:, i] = action + self.logprobs[:, i] = logprob + self.rewards[:, i] = r + self.terminals[:, i] = d.float() + self.values[:, i] = value.flatten() + + if isinstance(logits, torch.distributions.Normal): + action = torch.clamp(action, self.vecenv.action_space.low, self.vecenv.action_space.high) + + self.vecenv.step(action) + + def evaluate(self): profile = self.profile epoch = self.epoch profile('eval', epoch) - profile('eval_misc', epoch, nest=True) config = self.config device = config['device'] if config['use_rnn']: - for k in self.lstm_h: - self.lstm_h[k] = torch.zeros(self.lstm_h[k].shape, device=device) - self.lstm_c[k] = torch.zeros(self.lstm_c[k].shape, device=device) - - self.full_rows = 0 - while self.full_rows < self.segments: - profile('env', epoch) - o, r, d, t, info, env_id, mask = self.vecenv.recv() - - profile('eval_misc', epoch) - env_id = slice(env_id[0], env_id[-1] + 1) - - done_mask = d + t # TODO: Handle truncations separately - self.global_step += int(mask.sum()) - - profile('eval_copy', epoch) - o = torch.as_tensor(o) - o_device = o.to(device)#, non_blocking=True) - r = torch.as_tensor(r).to(device)#, non_blocking=True) - d = torch.as_tensor(d).to(device)#, non_blocking=True) - - profile('eval_forward', epoch) - with torch.no_grad(), self.amp_context: - state = dict( - reward=r, - done=d, - env_id=env_id, - mask=mask, - ) - - if config['use_rnn']: - state['lstm_h'] = self.lstm_h[env_id.start] - state['lstm_c'] = self.lstm_c[env_id.start] - - #h, c = state['lstm_h'], state['lstm_c'] - logits, value = self.policy(o_device) - #logits, value, h, c = self.policy(o_device, h, c) - #state['lstm_h'] = h - #state['lstm_c'] = c - action, logprob, _ = pufferlib.pytorch.sample_logits(logits) - r = torch.clamp(r, -1, 1) + n = self.agents_per_batch + h = 128 + #h = self.policy.hidden_size + + lstm_h = torch.zeros((n, h), device=device) + lstm_c = torch.zeros((n, h), device=device) + + lstm_h, lstm_c = _C.compiled_evaluate( + self.vecenv.env, + self.vecenv.observations, + self.vecenv.actions, + self.vecenv.rewards, + self.vecenv.terminals, + self.policy, + lstm_h, + lstm_c, + self.observations, + self.actions, + self.logprobs, + self.rewards, + self.terminals, + self.values, + self.config['bptt_horizon'], + self.num_envs # Or wherever num_envs is stored + ) - profile('eval_copy', epoch) - with torch.no_grad(): - if config['use_rnn']: - self.lstm_h[env_id.start] = state['lstm_h'] - self.lstm_c[env_id.start] = state['lstm_c'] - - # Fast path for fully vectorized envs - l = self.ep_lengths[env_id.start].item() - batch_rows = slice(self.ep_indices[env_id.start].item(), 1+self.ep_indices[env_id.stop - 1].item()) - - if config['cpu_offload']: - self.observations[batch_rows, l] = o - else: - self.observations[batch_rows, l] = o_device - - self.actions[batch_rows, l] = action - self.logprobs[batch_rows, l] = logprob - self.rewards[batch_rows, l] = r - self.terminals[batch_rows, l] = d.float() - self.values[batch_rows, l] = value.flatten() - - # Note: We are not yet handling masks in this version - self.ep_lengths[env_id] += 1 - if l+1 >= config['bptt_horizon']: - num_full = env_id.stop - env_id.start - self.ep_indices[env_id] = self.free_idx + torch.arange(num_full, device=config['device']).int() - self.ep_lengths[env_id] = 0 - self.free_idx += num_full - self.full_rows += num_full - - action = action.cpu().numpy() - if isinstance(logits, torch.distributions.Normal): - action = np.clip(action, self.vecenv.action_space.low, self.vecenv.action_space.high) - - profile('eval_misc', epoch) - for i in info: - for k, v in pufferlib.unroll_nested_dict(i): - if isinstance(v, np.ndarray): - v = v.tolist() - elif isinstance(v, (list, tuple)): - self.stats[k].extend(v) - else: - self.stats[k].append(v) - - profile('env', epoch) - self.vecenv.send(action) - - profile('eval_misc', epoch) - self.free_idx = self.total_agents - self.ep_indices = torch.arange(self.total_agents, device=device, dtype=torch.int32) - self.ep_lengths.zero_() + self.global_step += config['batch_size'] profile.end() return self.stats - def train(self): - epoch = self.epoch profile = self.profile + epoch = self.epoch profile('train', epoch) - profile('train_learn', epoch, nest=True) - - _compiled_train(self.policy, self.optimizer, self.observations, self.actions, self.logprobs, self.rewards, - self.terminals, self.truncations, self.ratio, self.values, self.epoch, self.total_epochs, - self.minibatch_segments, self.segments, self.vecenv.single_observation_space.shape, - int(self.vecenv.single_action_space.n), self.config) + config = self.config + device = config['device'] + + losses = _C.compiled_train( + self.observations, + self.actions, + self.logprobs, + self.rewards, + self.terminals, + self.truncations, + self.ratio, + self.values, + self.policy, + self.optimizer, + self.scheduler if self.config['anneal_lr'] else None, + self.total_minibatches, + self.minibatch_segments, + self.segments, # Assuming self.segments = self.num_envs + self.accumulate_minibatches, + self.config['bptt_horizon'], + self.config['prio_beta0'], + self.config['prio_alpha'], + self.config['clip_coef'], + self.config['vf_clip_coef'], + self.config['gamma'], + self.config['gae_lambda'], + self.config['vtrace_rho_clip'], + self.config['vtrace_c_clip'], + self.config['vf_coef'], + self.config['ent_coef'], + self.config['max_grad_norm'], + self.config['use_rnn'], + self.config['anneal_lr'], + self.total_epochs, + self.epoch + ) - profile('train_misc', epoch) # Reprioritize experience - ''' profile('train_misc', epoch) if config['anneal_lr']: self.scheduler.step() - y_pred = self.values.flatten() - y_true = advantages.flatten() + self.values.flatten() - var_y = y_true.var() - explained_var = torch.nan if var_y == 0 else (1 - (y_true - y_pred).var() / var_y).item() - losses['explained_variance'] = explained_var - ''' + #y_pred = self.values.flatten() + #y_true = advantages.flatten() + self.values.flatten() + #var_y = y_true.var() + #explained_var = torch.nan if var_y == 0 else (1 - (y_true - y_pred).var() / var_y).item() + #losses['explained_variance'] = explained_var profile.end() - - config = self.config - - logs = None self.epoch += 1 done_training = self.global_step >= config['total_timesteps'] if done_training or self.global_step == 0 or time.time() > self.last_log_time + 0.25: logs = self.mean_and_log() - #self.losses = losses + self.losses = losses self.print_dashboard() self.stats = defaultdict(list) self.last_log_time = time.time() self.last_log_step = self.global_step profile.clear() - if self.epoch == 1: - profile.reset() if self.epoch % config['checkpoint_interval'] == 0 or done_training: self.save_checkpoint() @@ -427,7 +455,7 @@ def save_checkpoint(self): if os.path.exists(model_path): return model_path - torch.save(self.uncompiled_policy.state_dict(), model_path) + #torch.save(self.uncompiled_policy.state_dict(), model_path) state = { 'optimizer_state_dict': self.optimizer.state_dict(), @@ -614,7 +642,7 @@ def dist_mean(value, device): return dist_sum(value, device) / torch.distributed.get_world_size() class Profile: - def __init__(self, frequency=5): + def __init__(self, frequency=1): self.reset() self.frequency = frequency self.stack = [] @@ -662,30 +690,53 @@ def clear(self): prof['buffer'] = prof['delta'] prof['delta'] = 0 -def compute_loss(params_and_buffers, policy, mb_obs, +@torch.compile(fullgraph=True, mode='reduce-overhead') +def lstm_loop(policy, params_and_buffers, mb_obs, h, c, logits, newvalue, horizon): + for t in range(horizon): + l, n, h, c = func.functional_call(policy, params_and_buffers, (mb_obs[:, t], h, c)) + logits[:, t] = l + newvalue[:, t] = n + + return logits, newvalue + +def compute_loss(params_and_buffers, policy, lstm, mb_obs, mb_actions, mb_logprobs, mb_rewards, mb_terminals, mb_truncations, mb_ratio, mb_values, mb_returns, mb_advantages, mb_prio, adv, epoch, idx, config, clip_coef, vf_clip, num_minibatches, mb, horizon, num_atns, segments): segments = int(segments) num_atns = int(num_atns) - #logits = torch.empty(segments, horizon, num_atns, device=config['device']) - #newvalue = torch.empty(segments, horizon, 1, device=config['device']) - logits = [] - newvalue = [] + logits = torch.empty(segments, horizon, num_atns, device=config['device']) + newvalue = torch.empty(segments, horizon, 1, device=config['device']) + #logits = [] + #newvalue = [] #h = None #c = None + + ''' h = torch.zeros(segments, 128, device=config['device']) c = torch.zeros(segments, 128, device=config['device']) + logits, values = lstm_loop(policy, params_and_buffers, mb_obs, h, c, logits, newvalue, horizon) + ''' + + state = { + 'lstm_h': None, + 'lstm_c': None + } + logits, values = forward_train(policy, lstm, mb_obs, state) + #for t in range(horizon): - # #l, n, h, c = func.functional_call(policy, params_and_buffers, (mb_obs[:, t], h, c)) - # l, n, h, c = func.functional_call(policy, params_and_buffers, (mb_obs[:, t], None, None)) - # #logits[:, t] = l - # #newvalue[:, t] = n - # logits.append(l) - # newvalue.append(n) + # l, n, h, c = func.functional_call(policy, params_and_buffers, (mb_obs[:, t], h, c)) + # #l, n, h, c = func.functional_call(policy, params_and_buffers, (mb_obs[:, t], None, None)) + # logits[:, t] = l + # newvalue[:, t] = n + # #logits.append(l) + # #newvalue.append(n) - logits, newvalue = func.functional_call(policy, params_and_buffers, mb_obs) + logits = logits.reshape(segments*horizon, num_atns) + + #logits, newvalue, h, c = policy(mb_obs, h, c) + #logits, newvalue = func.functional_call(policy, params_and_buffers, mb_obs) #logits = torch.cat(logits, dim=0) #newvalue = torch.stack(newvalue, dim=1) @@ -724,8 +775,8 @@ def compute_loss(params_and_buffers, policy, mb_obs, loss = pg_loss + config['vf_coef']*v_loss - config['ent_coef']*entropy_loss return loss -@torch.compile(fullgraph=True) -def _compiled_train(policy, optimizer, observations, actions, logprobs, rewards, +#@torch.compile(fullgraph=True) +def _compiled_train(policy, lstm, optimizer, observations, actions, logprobs, rewards, terminals, truncations, ratio, values, epoch, total_epochs, minibatch_segments, segments, obs_shape, atn_shape, config): @@ -779,8 +830,17 @@ def _compiled_train(policy, optimizer, observations, actions, logprobs, rewards, adv = advantages[idx] + optimizer.zero_grad() + loss = compute_loss(params_and_buffers, policy, lstm, mb_obs, + mb_actions, mb_logprobs, mb_rewards, mb_terminals, mb_truncations, + mb_ratio, mb_values, mb_returns, mb_advantages, mb_prio, adv, epoch, idx, + config, clip_coef, vf_clip, num_minibatches, mb, horizon, atn_shape, minibatch_segments) + loss.backward() + optimizer.step() + + ''' grad_fn = func.grad(compute_loss, has_aux=False) - grads = grad_fn(params_and_buffers, policy, mb_obs, + grads = grad_fn(params_and_buffers, policy, lstm, mb_obs, mb_actions, mb_logprobs, mb_rewards, mb_terminals, mb_truncations, mb_ratio, mb_values, mb_returns, mb_advantages, mb_prio, adv, epoch, idx, config, clip_coef, vf_clip, num_minibatches, mb, horizon, atn_shape, minibatch_segments) @@ -788,6 +848,7 @@ def _compiled_train(policy, optimizer, observations, actions, logprobs, rewards, for name, param in zip(param_names, params): if name in grads: param.data.sub_(lr * grads[name]) + ''' class Utilization(Thread): def __init__(self, delay=1, maxlen=20): @@ -974,7 +1035,8 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None, verbose=Tr logger = WandbLogger(args) train_config = dict(**args['train'], env=env_name) - pufferl = PuffeRL(train_config, vecenv, policy, logger, verbose) + #pufferl = PuffeRL(train_config, vecenv, policy, logger, verbose) + pufferl = PuffeRL(train_config, logger, verbose) pufferl.logger.init(args) all_logs = [] diff --git a/setup.py b/setup.py index b7c63dae1..f1004bdbf 100644 --- a/setup.py +++ b/setup.py @@ -11,6 +11,7 @@ import tarfile import platform import shutil +import pybind11 from setuptools.command.build_ext import build_ext from torch.utils import cpp_extension @@ -76,7 +77,7 @@ def download_box2d(platform): cxx_args = [ '-fdiagnostics-color=always', ] -nvcc_args = [] +nvcc_args = ['-Xcompiler=-D_GLIBCXX_USE_CXX11_ABI=1'] if DEBUG: extra_compile_args += [ @@ -227,6 +228,7 @@ def run(self): extension( "pufferlib._C", torch_sources, + include_dirs=[pybind11.get_include()], extra_compile_args = { "cxx": cxx_args, "nvcc": nvcc_args, From 81de3c55a51d641e170141e4e4c37ac256fe8c4e Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 24 Oct 2025 19:41:29 +0000 Subject: [PATCH 076/188] 4.6M sps test --- pufferlib/extensions/pufferlib.cpp | 40 ++++++++++++++++++++++++------ pufferlib/pufferl.py | 21 +++++++++++----- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/pufferlib/extensions/pufferlib.cpp b/pufferlib/extensions/pufferlib.cpp index 0b4590f0f..9ea6a7847 100644 --- a/pufferlib/extensions/pufferlib.cpp +++ b/pufferlib/extensions/pufferlib.cpp @@ -248,6 +248,24 @@ class PolicyLSTM : public torch::nn::Module { } }; +struct OptimizerWrapper { + torch::optim::Adam optimizer; + + // Constructor + explicit OptimizerWrapper(torch::nn::Module& module, double lr, double beta1, double beta2, double eps) + : optimizer(module.parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)) + { + } + // Optional: expose step, zero_grad, etc., if needed from Python + void step() { + optimizer.step(); + } + + void zero_grad() { + optimizer.zero_grad(); + } +}; + // Updated compiled_evaluate std::tuple compiled_evaluate( torch::Tensor envs_tensor, @@ -318,8 +336,8 @@ pybind11::dict compiled_train( torch::Tensor ratio, // [num_envs, horizon] float torch::Tensor values, // [num_envs, horizon] float pybind11::object policy_obj, - pybind11::object optimizer, - pybind11::object scheduler, + pybind11::object optimizer_wrapper_obj, + //pybind11::object scheduler, int64_t total_minibatches, int64_t minibatch_segments, int64_t segments, // num_envs @@ -342,6 +360,8 @@ pybind11::dict compiled_train( int64_t current_epoch ) { auto& policy = policy_obj.cast(); + auto& optimized_wrapper = optimizer_wrapper_obj.cast(); + auto& optimizer = optimized_wrapper.optimizer; // Compute anneal_beta double anneal_beta = prio_beta0 + (1.0 - prio_beta0) * prio_alpha * static_cast(current_epoch) / total_epochs; @@ -451,15 +471,15 @@ pybind11::dict compiled_train( params_list.push_back(param.cast()); } torch::nn::utils::clip_grad_norm_(params_list, max_grad_norm); - optimizer.attr("step")(); - optimizer.attr("zero_grad")(); + optimizer.step(); + optimizer.zero_grad(); } } // Scheduler step if anneal_lr - if (anneal_lr) { - scheduler.attr("step")(); - } + //if (anneal_lr) { + // scheduler.attr("step")(); + //} pybind11::dict losses; auto num_mb = static_cast(total_minibatches); @@ -479,6 +499,12 @@ PYBIND11_MODULE(_C, m) { m.def("compiled_evaluate", &compiled_evaluate); m.def("compiled_train", &compiled_train); + py::class_(m, "OptimizerWrapper") + .def(py::init(), + py::arg("module"), py::arg("lr"), py::arg("beta1"), py::arg("beta2"), py::arg("eps")) + .def("step", &OptimizerWrapper::step) + .def("zero_grad", &OptimizerWrapper::zero_grad); + py::class_, torch::nn::Module> cls(m, "PolicyLSTM"); cls.def(py::init()); cls.def("forward", &pufferlib::PolicyLSTM::forward); diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 2d9b1c1a1..9ef0b5a76 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -181,6 +181,7 @@ def __init__(self, config, logger=None, verbose=True): pufferlib.pytorch.sample_logits = torch.compile(pufferlib.pytorch.sample_logits, mode=config['compile_mode']) # Optimizer + ''' if config['optimizer'] == 'adam': optimizer = torch.optim.Adam( self.policy.parameters(), @@ -201,6 +202,14 @@ def __init__(self, config, logger=None, verbose=True): ) else: raise ValueError(f'Unknown optimizer: {config["optimizer"]}') + ''' + optimizer = _C.OptimizerWrapper( + self.policy, + config['learning_rate'], + config['adam_beta1'], + config['adam_beta2'], + config['adam_eps'], + ) self.optimizer = optimizer @@ -211,7 +220,7 @@ def __init__(self, config, logger=None, verbose=True): # Learning rate scheduler epochs = max(1, config['total_timesteps'] // config['batch_size']) - self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs) + #self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs) self.total_epochs = epochs # Automatic mixed precision @@ -339,7 +348,7 @@ def train(self): self.values, self.policy, self.optimizer, - self.scheduler if self.config['anneal_lr'] else None, + #self.scheduler if self.config['anneal_lr'] else None, self.total_minibatches, self.minibatch_segments, self.segments, # Assuming self.segments = self.num_envs @@ -364,8 +373,8 @@ def train(self): # Reprioritize experience profile('train_misc', epoch) - if config['anneal_lr']: - self.scheduler.step() + #if config['anneal_lr']: + # self.scheduler.step() #y_pred = self.values.flatten() #y_true = advantages.flatten() + self.values.flatten() @@ -410,7 +419,7 @@ def mean_and_log(self): 'agent_steps': agent_steps, 'uptime': time.time() - self.start_time, 'epoch': int(dist_sum(self.epoch, device)), - 'learning_rate': self.optimizer.param_groups[0]["lr"], + #'learning_rate': self.optimizer.param_groups[0]["lr"], **{f'environment/{k}': v for k, v in self.stats.items()}, **{f'losses/{k}': v for k, v in self.losses.items()}, **{f'performance/{k}': v['elapsed'] for k, v in self.profile}, @@ -458,7 +467,7 @@ def save_checkpoint(self): #torch.save(self.uncompiled_policy.state_dict(), model_path) state = { - 'optimizer_state_dict': self.optimizer.state_dict(), + #'optimizer_state_dict': self.optimizer.state_dict(), 'global_step': self.global_step, 'agent_step': self.global_step, 'update': self.epoch, From 4d3cdbdcecbba57d4acf10aa1c9ffb2f442c34aa Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 24 Oct 2025 20:36:36 +0000 Subject: [PATCH 077/188] Un-cppify a bit --- pufferlib/extensions/pufferlib.cpp | 71 ++++++++++++++---------------- pufferlib/pufferl.py | 34 +++++++------- 2 files changed, 49 insertions(+), 56 deletions(-) diff --git a/pufferlib/extensions/pufferlib.cpp b/pufferlib/extensions/pufferlib.cpp index 9ea6a7847..3accd9edf 100644 --- a/pufferlib/extensions/pufferlib.cpp +++ b/pufferlib/extensions/pufferlib.cpp @@ -248,32 +248,30 @@ class PolicyLSTM : public torch::nn::Module { } }; -struct OptimizerWrapper { - torch::optim::Adam optimizer; - - // Constructor - explicit OptimizerWrapper(torch::nn::Module& module, double lr, double beta1, double beta2, double eps) - : optimizer(module.parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)) - { - } - // Optional: expose step, zero_grad, etc., if needed from Python - void step() { - optimizer.step(); - } - - void zero_grad() { - optimizer.zero_grad(); - } -}; +typedef struct { + PolicyLSTM* policy; + torch::optim::Adam* optimizer; +} PuffeRL; + +std::unique_ptr create_pufferl(int64_t input_size, + int64_t num_atns, int64_t hidden_size, double lr, double beta1, double beta2, double eps) { + auto policy = new PolicyLSTM(input_size, num_atns, hidden_size); + auto optimizer = new torch::optim::Adam(policy->parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)); + + auto pufferl = std::make_unique(); + pufferl->policy = policy; + pufferl->optimizer = optimizer; + return pufferl; +} // Updated compiled_evaluate std::tuple compiled_evaluate( + pybind11::object pufferl_obj, torch::Tensor envs_tensor, torch::Tensor obs, torch::Tensor actions, torch::Tensor rewards, torch::Tensor terminals, - pybind11::object policy_obj, // pybind11::object for Python compatibility torch::Tensor lstm_h, torch::Tensor lstm_c, torch::Tensor obs_buffer, @@ -285,7 +283,9 @@ std::tuple compiled_evaluate( int64_t horizon, int64_t num_envs ) { - auto& policy = policy_obj.cast(); + auto& pufferl = pufferl_obj.cast(); + auto& policy = pufferl.policy; + auto& optimizer = pufferl.optimizer; // No-grad guard torch::NoGradGuard no_grad; @@ -295,7 +295,7 @@ std::tuple compiled_evaluate( auto r = rewards.clamp(-1.0f, 1.0f); // Policy forward: Native C++ call - auto [logits, value, lstm_h_out, lstm_c_out] = policy.forward(obs, lstm_h, lstm_c); + auto [logits, value, lstm_h_out, lstm_c_out] = policy->forward(obs, lstm_h, lstm_c); lstm_h = lstm_h_out; lstm_c = lstm_c_out; @@ -327,6 +327,7 @@ std::tuple compiled_evaluate( // Updated compiled_train pybind11::dict compiled_train( + pybind11::object pufferl_obj, torch::Tensor observations, // [num_envs, horizon, grid_size, grid_size] uint8 torch::Tensor actions, // [num_envs, horizon] int32 torch::Tensor logprobs, // [num_envs, horizon] float @@ -335,8 +336,6 @@ pybind11::dict compiled_train( torch::Tensor truncations, // [num_envs, horizon] float (included but not used in loop) torch::Tensor ratio, // [num_envs, horizon] float torch::Tensor values, // [num_envs, horizon] float - pybind11::object policy_obj, - pybind11::object optimizer_wrapper_obj, //pybind11::object scheduler, int64_t total_minibatches, int64_t minibatch_segments, @@ -359,9 +358,9 @@ pybind11::dict compiled_train( int64_t total_epochs, int64_t current_epoch ) { - auto& policy = policy_obj.cast(); - auto& optimized_wrapper = optimizer_wrapper_obj.cast(); - auto& optimizer = optimized_wrapper.optimizer; + auto& pufferl = pufferl_obj.cast(); + auto& policy = pufferl.policy; + auto& optimizer = pufferl.optimizer; // Compute anneal_beta double anneal_beta = prio_beta0 + (1.0 - prio_beta0) * prio_alpha * static_cast(current_epoch) / total_epochs; @@ -408,7 +407,7 @@ pybind11::dict compiled_train( torch::Tensor mb_lstm_c; // Policy forward: Native C++ call - auto [logits, newvalue] = policy.forward_train(mb_obs, mb_lstm_h, mb_lstm_c); + auto [logits, newvalue] = policy->forward_train(mb_obs, mb_lstm_h, mb_lstm_c); // Compute newlogprob and entropy (discrete assumption) auto flat_batch = minibatch_segments * horizon; @@ -465,14 +464,9 @@ pybind11::dict compiled_train( // Accumulate and step if ((mb + 1) % accumulate_minibatches == 0) { - pybind11::iterable params_iter = policy_obj.attr("parameters")(); - std::vector params_list; - for (pybind11::handle param : params_iter) { - params_list.push_back(param.cast()); - } - torch::nn::utils::clip_grad_norm_(params_list, max_grad_norm); - optimizer.step(); - optimizer.zero_grad(); + torch::nn::utils::clip_grad_norm_(policy->parameters(), max_grad_norm); + optimizer->step(); + optimizer->zero_grad(); } } @@ -499,11 +493,10 @@ PYBIND11_MODULE(_C, m) { m.def("compiled_evaluate", &compiled_evaluate); m.def("compiled_train", &compiled_train); - py::class_(m, "OptimizerWrapper") - .def(py::init(), - py::arg("module"), py::arg("lr"), py::arg("beta1"), py::arg("beta2"), py::arg("eps")) - .def("step", &OptimizerWrapper::step) - .def("zero_grad", &OptimizerWrapper::zero_grad); + m.def("create_pufferl", &create_pufferl); + py::class_>(m, "PuffeRL") + .def_readwrite("policy", &pufferlib::PuffeRL::policy) + .def_readwrite("optimizer", &pufferlib::PuffeRL::optimizer); py::class_, torch::nn::Module> cls(m, "PolicyLSTM"); cls.def(py::init()); diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 9ef0b5a76..2ac9dab20 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -104,8 +104,8 @@ def __init__(self, config, logger=None, verbose=True): self.single_observation_space = obs_space self.single_action_space = atn_space - policy = _C.PolicyLSTM(grid_size*grid_size, 5, 128) - policy.cuda() + #policy = _C.PolicyLSTM(grid_size*grid_size, 5, 128) + #policy.cuda() # Vecenv info #vecenv.async_reset(seed) @@ -172,13 +172,13 @@ def __init__(self, config, logger=None, verbose=True): ) # Torch compile - self.uncompiled_policy = policy - self.policy = policy + #self.uncompiled_policy = policy + #self.policy = policy #self.compiled_policy = torch.compile(policy) - if config['compile']: - self.policy = torch.compile(policy, mode=config['compile_mode']) - #self.policy.forward_eval = torch.compile(policy, mode=config['compile_mode']) - pufferlib.pytorch.sample_logits = torch.compile(pufferlib.pytorch.sample_logits, mode=config['compile_mode']) + #if config['compile']: + # self.policy = torch.compile(policy, mode=config['compile_mode']) + # #self.policy.forward_eval = torch.compile(policy, mode=config['compile_mode']) + # pufferlib.pytorch.sample_logits = torch.compile(pufferlib.pytorch.sample_logits, mode=config['compile_mode']) # Optimizer ''' @@ -203,15 +203,16 @@ def __init__(self, config, logger=None, verbose=True): else: raise ValueError(f'Unknown optimizer: {config["optimizer"]}') ''' - optimizer = _C.OptimizerWrapper( - self.policy, + self.pufferl_cpp = _C.create_pufferl( + grid_size*grid_size, + 5, + 128, config['learning_rate'], config['adam_beta1'], config['adam_beta2'], config['adam_eps'], ) - - self.optimizer = optimizer + self.pufferl_cpp.policy.cuda() # Logging self.logger = logger @@ -247,7 +248,7 @@ def __init__(self, config, logger=None, verbose=True): self.verbose = verbose # Dashboard - self.model_size = sum(p.numel() for p in policy.parameters() if p.requires_grad) + #self.model_size = sum(p.numel() for p in policy.parameters() if p.requires_grad) self.print_dashboard(clear=True) @property @@ -308,12 +309,12 @@ def evaluate(self): lstm_c = torch.zeros((n, h), device=device) lstm_h, lstm_c = _C.compiled_evaluate( + self.pufferl_cpp, self.vecenv.env, self.vecenv.observations, self.vecenv.actions, self.vecenv.rewards, self.vecenv.terminals, - self.policy, lstm_h, lstm_c, self.observations, @@ -338,6 +339,7 @@ def train(self): device = config['device'] losses = _C.compiled_train( + self.pufferl_cpp, self.observations, self.actions, self.logprobs, @@ -346,8 +348,6 @@ def train(self): self.truncations, self.ratio, self.values, - self.policy, - self.optimizer, #self.scheduler if self.config['anneal_lr'] else None, self.total_minibatches, self.minibatch_segments, @@ -521,7 +521,7 @@ def print_dashboard(self, clear=False, idx=[0], s.add_column(f"{c1}Summary", justify='left', vertical='top', width=10) s.add_column(f"{c1}Value", justify='right', vertical='top', width=14) s.add_row(f'{c2}Env', f'{b2}{config["env"]}') - s.add_row(f'{c2}Params', abbreviate(self.model_size, b2, c2)) + #s.add_row(f'{c2}Params', abbreviate(self.model_size, b2, c2)) s.add_row(f'{c2}Steps', abbreviate(agent_steps, b2, c2)) s.add_row(f'{c2}SPS', abbreviate(sps, b2, c2)) s.add_row(f'{c2}Epoch', f'{b2}{self.epoch}') From 3699447d894562be1a78cbe128e27cce88a6d72e Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 24 Oct 2025 21:09:10 +0000 Subject: [PATCH 078/188] Cosine anneal lr --- pufferlib/extensions/pufferlib.cpp | 24 +++++++++++++++++------- pufferlib/pufferl.py | 21 +++++++++++---------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/pufferlib/extensions/pufferlib.cpp b/pufferlib/extensions/pufferlib.cpp index 3accd9edf..45ae37590 100644 --- a/pufferlib/extensions/pufferlib.cpp +++ b/pufferlib/extensions/pufferlib.cpp @@ -248,19 +248,30 @@ class PolicyLSTM : public torch::nn::Module { } }; +double cosine_annealing(double lr_base, int64_t t, int64_t T) { + if (T == 0) return lr_base; // avoid division by zero + double ratio = static_cast(t) / static_cast(T); + ratio = std::max(0.0, std::min(1.0, ratio)); // clamp to [0, 1] + return lr_base * 0.5 * (1 + std::cos(M_PI * ratio)); +} + typedef struct { PolicyLSTM* policy; torch::optim::Adam* optimizer; + double lr; + int64_t max_epochs; } PuffeRL; std::unique_ptr create_pufferl(int64_t input_size, - int64_t num_atns, int64_t hidden_size, double lr, double beta1, double beta2, double eps) { + int64_t num_atns, int64_t hidden_size, double lr, double beta1, double beta2, double eps, int64_t max_epochs) { auto policy = new PolicyLSTM(input_size, num_atns, hidden_size); auto optimizer = new torch::optim::Adam(policy->parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)); auto pufferl = std::make_unique(); pufferl->policy = policy; pufferl->optimizer = optimizer; + pufferl->lr = lr; + pufferl->max_epochs = max_epochs; return pufferl; } @@ -336,7 +347,6 @@ pybind11::dict compiled_train( torch::Tensor truncations, // [num_envs, horizon] float (included but not used in loop) torch::Tensor ratio, // [num_envs, horizon] float torch::Tensor values, // [num_envs, horizon] float - //pybind11::object scheduler, int64_t total_minibatches, int64_t minibatch_segments, int64_t segments, // num_envs @@ -362,6 +372,11 @@ pybind11::dict compiled_train( auto& policy = pufferl.policy; auto& optimizer = pufferl.optimizer; + if (anneal_lr) { + double lr = cosine_annealing(pufferl.lr, current_epoch, pufferl.max_epochs); + optimizer->param_groups().at(0).options().set_lr(lr); + } + // Compute anneal_beta double anneal_beta = prio_beta0 + (1.0 - prio_beta0) * prio_alpha * static_cast(current_epoch) / total_epochs; @@ -470,11 +485,6 @@ pybind11::dict compiled_train( } } - // Scheduler step if anneal_lr - //if (anneal_lr) { - // scheduler.attr("step")(); - //} - pybind11::dict losses; auto num_mb = static_cast(total_minibatches); losses["pg_loss"] = (pg_sum / num_mb).item(); diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 2ac9dab20..c45f4b069 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -203,6 +203,16 @@ def __init__(self, config, logger=None, verbose=True): else: raise ValueError(f'Unknown optimizer: {config["optimizer"]}') ''' + # Logging + self.logger = logger + if logger is None: + self.logger = Logger(config) + + # Learning rate scheduler + epochs = max(1, config['total_timesteps'] // config['batch_size']) + #self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs) + self.total_epochs = epochs + self.pufferl_cpp = _C.create_pufferl( grid_size*grid_size, 5, @@ -211,19 +221,10 @@ def __init__(self, config, logger=None, verbose=True): config['adam_beta1'], config['adam_beta2'], config['adam_eps'], + epochs, ) self.pufferl_cpp.policy.cuda() - # Logging - self.logger = logger - if logger is None: - self.logger = Logger(config) - - # Learning rate scheduler - epochs = max(1, config['total_timesteps'] // config['batch_size']) - #self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs) - self.total_epochs = epochs - # Automatic mixed precision precision = config['precision'] self.amp_context = contextlib.nullcontext() From 8f64e121bd9e12e7c4b5495af5f10d613d721016 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 24 Oct 2025 21:50:04 +0000 Subject: [PATCH 079/188] bf16. It is slower. Will fix after --- pufferlib/extensions/pufferlib.cpp | 87 +++++++++++++++++++----------- pufferlib/pufferl.py | 2 - 2 files changed, 55 insertions(+), 34 deletions(-) diff --git a/pufferlib/extensions/pufferlib.cpp b/pufferlib/extensions/pufferlib.cpp index 45ae37590..c5c658ba9 100644 --- a/pufferlib/extensions/pufferlib.cpp +++ b/pufferlib/extensions/pufferlib.cpp @@ -174,7 +174,7 @@ class PolicyLSTM : public torch::nn::Module { "c must be [B, hidden_size]"); } - auto hidden = encoder->forward(observations.to(torch::kFloat32)); + auto hidden = encoder->forward(observations); std::tuple cell_out; if (h.defined() && h.numel() > 0) { @@ -221,7 +221,7 @@ class PolicyLSTM : public torch::nn::Module { TT = 1; } - auto hidden = encoder->forward(x.to(torch::kFloat32)); + auto hidden = encoder->forward(x); hidden = hidden.reshape({B, TT, hidden_size_}); hidden = hidden.transpose(0, 1); // [TT, B, hidden_size] @@ -234,7 +234,6 @@ class PolicyLSTM : public torch::nn::Module { } hidden = std::get<0>(lstm_out); - hidden = hidden.to(torch::kFloat32); hidden = hidden.transpose(0, 1); // [B, TT, hidden_size] auto flat_hidden = hidden.reshape({-1, hidden_size_}); @@ -255,8 +254,17 @@ double cosine_annealing(double lr_base, int64_t t, int64_t T) { return lr_base * 0.5 * (1 + std::cos(M_PI * ratio)); } +void sync_fp16_fp32(pufferlib::PolicyLSTM* policy_16, pufferlib::PolicyLSTM* policy_32) { + auto params_32 = policy_32->parameters(); + auto params_16 = policy_16->parameters(); + for (size_t i = 0; i < params_32.size(); ++i) { + params_16[i].copy_(params_32[i].to(torch::kBFloat16)); + } +} + typedef struct { - PolicyLSTM* policy; + PolicyLSTM* policy_16; + PolicyLSTM* policy_32; torch::optim::Adam* optimizer; double lr; int64_t max_epochs; @@ -264,11 +272,18 @@ typedef struct { std::unique_ptr create_pufferl(int64_t input_size, int64_t num_atns, int64_t hidden_size, double lr, double beta1, double beta2, double eps, int64_t max_epochs) { - auto policy = new PolicyLSTM(input_size, num_atns, hidden_size); - auto optimizer = new torch::optim::Adam(policy->parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)); + auto policy_16 = new PolicyLSTM(input_size, num_atns, hidden_size); + policy_16->to(torch::kCUDA); + policy_16->to(torch::kBFloat16); + + auto policy_32 = new PolicyLSTM(input_size, num_atns, hidden_size); + policy_32->to(torch::kCUDA); + + auto optimizer = new torch::optim::Adam(policy_16->parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)); auto pufferl = std::make_unique(); - pufferl->policy = policy; + pufferl->policy_16 = policy_16; + pufferl->policy_32 = policy_32; pufferl->optimizer = optimizer; pufferl->lr = lr; pufferl->max_epochs = max_epochs; @@ -279,12 +294,12 @@ std::unique_ptr create_pufferl(int64_t input_size, std::tuple compiled_evaluate( pybind11::object pufferl_obj, torch::Tensor envs_tensor, - torch::Tensor obs, - torch::Tensor actions, - torch::Tensor rewards, - torch::Tensor terminals, - torch::Tensor lstm_h, - torch::Tensor lstm_c, + torch::Tensor obs_input, + torch::Tensor actions_input, + torch::Tensor rewards_input, + torch::Tensor terminals_input, + torch::Tensor lstm_h_input, + torch::Tensor lstm_c_input, torch::Tensor obs_buffer, torch::Tensor act_buffer, torch::Tensor logprob_buffer, @@ -295,12 +310,19 @@ std::tuple compiled_evaluate( int64_t num_envs ) { auto& pufferl = pufferl_obj.cast(); - auto& policy = pufferl.policy; + auto& policy = pufferl.policy_16; auto& optimizer = pufferl.optimizer; // No-grad guard torch::NoGradGuard no_grad; + auto obs = obs_input.to(torch::kBFloat16); + auto actions = actions_input.to(torch::kBFloat16); + auto rewards = rewards_input.to(torch::kBFloat16); + auto terminals = terminals_input.to(torch::kBFloat16); + auto lstm_h = lstm_h_input.to(torch::kBFloat16); + auto lstm_c = lstm_c_input.to(torch::kBFloat16); + for (int64_t i = 0; i < horizon; ++i) { // Clamp rewards auto r = rewards.clamp(-1.0f, 1.0f); @@ -321,12 +343,12 @@ std::tuple compiled_evaluate( action = action.squeeze(1); // Store to buffers - obs_buffer.select(1, i).copy_(obs); - act_buffer.select(1, i).copy_(action); - logprob_buffer.select(1, i).copy_(logprob); - rew_buffer.select(1, i).copy_(r); + obs_buffer.select(1, i).copy_(obs.to(torch::kFloat32)); + act_buffer.select(1, i).copy_(action.to(torch::kInt32)); + logprob_buffer.select(1, i).copy_(logprob.to(torch::kFloat32)); + rew_buffer.select(1, i).copy_(r.to(torch::kFloat32)); term_buffer.select(1, i).copy_(terminals.to(torch::kFloat32)); - val_buffer.select(1, i).copy_(value.flatten()); + val_buffer.select(1, i).copy_(value.flatten().to(torch::kFloat32)); // Step the environments actions.copy_(action); @@ -369,7 +391,7 @@ pybind11::dict compiled_train( int64_t current_epoch ) { auto& pufferl = pufferl_obj.cast(); - auto& policy = pufferl.policy; + auto& policy_16 = pufferl.policy_16; auto& optimizer = pufferl.optimizer; if (anneal_lr) { @@ -402,15 +424,15 @@ pybind11::dict compiled_train( auto mb_prio = torch::pow(static_cast(segments) * prio_probs_mb, -anneal_beta); // Select minibatch tensors - auto mb_obs = observations.index_select(0, idx); + auto mb_obs = observations.index_select(0, idx).to(torch::kBFloat16); auto mb_actions = actions.index_select(0, idx); - auto mb_logprobs = logprobs.index_select(0, idx); - auto mb_rewards = rewards.index_select(0, idx); - auto mb_terminals = terminals.index_select(0, idx); - auto mb_ratio = ratio.index_select(0, idx); // Not used directly - auto mb_values = values.index_select(0, idx); - auto mb_advantages = advantages.index_select(0, idx); - auto mb_returns = mb_advantages + mb_values; + auto mb_logprobs = logprobs.index_select(0, idx).to(torch::kBFloat16); + auto mb_rewards = rewards.index_select(0, idx).to(torch::kBFloat16); + auto mb_terminals = terminals.index_select(0, idx).to(torch::kBFloat16); + auto mb_ratio = ratio.index_select(0, idx).to(torch::kBFloat16); // Not used directly + auto mb_values = values.index_select(0, idx).to(torch::kBFloat16); + auto mb_advantages = advantages.index_select(0, idx).to(torch::kBFloat16); + auto mb_returns = mb_advantages + mb_values.to(torch::kBFloat16); auto original_obs_shape = mb_obs.sizes(); // [minibatch_segments, horizon, grid_size, grid_size] if (!use_rnn) { @@ -422,7 +444,7 @@ pybind11::dict compiled_train( torch::Tensor mb_lstm_c; // Policy forward: Native C++ call - auto [logits, newvalue] = policy->forward_train(mb_obs, mb_lstm_h, mb_lstm_c); + auto [logits, newvalue] = policy_16->forward_train(mb_obs, mb_lstm_h, mb_lstm_c); // Compute newlogprob and entropy (discrete assumption) auto flat_batch = minibatch_segments * horizon; @@ -441,7 +463,7 @@ pybind11::dict compiled_train( auto mb_ratio_new = logratio.exp(); // Update full ratio (detach) - ratio.index_copy_(0, idx, mb_ratio_new.detach()); + ratio.index_copy_(0, idx, mb_ratio_new.detach().to(torch::kFloat32)); // Advantages normalization auto adv = mb_advantages; @@ -479,7 +501,7 @@ pybind11::dict compiled_train( // Accumulate and step if ((mb + 1) % accumulate_minibatches == 0) { - torch::nn::utils::clip_grad_norm_(policy->parameters(), max_grad_norm); + torch::nn::utils::clip_grad_norm_(policy_16->parameters(), max_grad_norm); optimizer->step(); optimizer->zero_grad(); } @@ -505,7 +527,8 @@ PYBIND11_MODULE(_C, m) { m.def("create_pufferl", &create_pufferl); py::class_>(m, "PuffeRL") - .def_readwrite("policy", &pufferlib::PuffeRL::policy) + .def_readwrite("policy_16", &pufferlib::PuffeRL::policy_16) + .def_readwrite("policy_32", &pufferlib::PuffeRL::policy_32) .def_readwrite("optimizer", &pufferlib::PuffeRL::optimizer); py::class_, torch::nn::Module> cls(m, "PolicyLSTM"); diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index c45f4b069..5bb721bc6 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -223,8 +223,6 @@ def __init__(self, config, logger=None, verbose=True): config['adam_eps'], epochs, ) - self.pufferl_cpp.policy.cuda() - # Automatic mixed precision precision = config['precision'] self.amp_context = contextlib.nullcontext() From bcef4864ed2cf9a06a6a03d8d2098475c9f73470 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Mon, 27 Oct 2025 16:07:33 +0000 Subject: [PATCH 080/188] Add logs --- pufferlib/extensions/cuda/squared_torch.cu | 40 ++++++++++-- pufferlib/extensions/pufferlib.cpp | 72 ++++++++++++++++------ pufferlib/pufferl.py | 14 +++++ 3 files changed, 104 insertions(+), 22 deletions(-) diff --git a/pufferlib/extensions/cuda/squared_torch.cu b/pufferlib/extensions/cuda/squared_torch.cu index b32f131fd..f35f6a6aa 100644 --- a/pufferlib/extensions/cuda/squared_torch.cu +++ b/pufferlib/extensions/cuda/squared_torch.cu @@ -70,7 +70,6 @@ __device__ void cuda_step(Squared* env) { env->tick += 1; int action = env->actions[0]; env->terminals[0] = 0; - /* env->rewards[0] = 0.0f; int pos = env->r * env->size + env->c; @@ -90,8 +89,8 @@ __device__ void cuda_step(Squared* env) { pos = env->r * env->size + env->c; // Check bounds and timeout - if (env->r < 0 || env->c < 0 || env->r >= env->size || env->c >= env->size || - env->tick > 3 * env->size) { + if (env->r < 0 || env->c < 0 || env->r >= env->size + || env->c >= env->size || env->tick > 3 * env->size) { env->terminals[0] = 1; env->rewards[0] = -1.0f; env->log.perf += 0; @@ -118,7 +117,6 @@ __device__ void cuda_step(Squared* env) { // Place agent env->observations[pos] = AGENT; - */ } // Kernel: Step all environments @@ -136,6 +134,14 @@ __global__ void reset_environments(Squared* envs, int* indices, int num_reset) { cuda_reset(&envs[env_idx], &envs[env_idx].rng); } +// Kernel: Reset specific environment logs +__global__ void reset_logs(Squared* envs, int* indices, int num_reset) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= num_reset) return; + int env_idx = indices[idx]; + envs[env_idx].log = {0}; +} + // Kernel: Initialize all environments __global__ void init_environments(Squared* envs, unsigned char* obs_mem, @@ -221,4 +227,30 @@ void reset_environments_cuda(torch::Tensor envs_tensor, torch::Tensor indices_te cudaDeviceSynchronize(); } +Log log_environments_cuda(torch::Tensor envs_tensor, torch::Tensor indices_tensor) { + Squared* envs = reinterpret_cast(envs_tensor.cpu().data_ptr()); + auto indices = indices_tensor.cpu().data_ptr(); + int num_log = indices_tensor.size(0); + Log log = {0}; + for (int i=0; i(envs_tensor.data_ptr()); + auto indices_gpu = indices_tensor.data_ptr(); + reset_logs<<>>(envs_gpu, indices_gpu, num_log); + cudaDeviceSynchronize(); + + return log; +} + } diff --git a/pufferlib/extensions/pufferlib.cpp b/pufferlib/extensions/pufferlib.cpp index c5c658ba9..51bdaa952 100644 --- a/pufferlib/extensions/pufferlib.cpp +++ b/pufferlib/extensions/pufferlib.cpp @@ -80,10 +80,20 @@ TORCH_LIBRARY_IMPL(pufferlib, CPU, m) { std::tuple create_squared_environments(int64_t num_envs, int64_t grid_size, torch::Tensor dummy); +struct Log { + float perf; + float score; + float episode_return; + float episode_length; + float n; +}; + void step_environments_cuda(torch::Tensor envs_tensor, int64_t num_envs); void reset_environments_cuda(torch::Tensor envs_tensor, torch::Tensor indices_tensor); +Log log_environments_cuda(torch::Tensor envs_tensor, torch::Tensor indices_tensor); + void compute_puff_advantage_cuda( torch::Tensor values, torch::Tensor rewards, @@ -258,7 +268,7 @@ void sync_fp16_fp32(pufferlib::PolicyLSTM* policy_16, pufferlib::PolicyLSTM* pol auto params_32 = policy_32->parameters(); auto params_16 = policy_16->parameters(); for (size_t i = 0; i < params_32.size(); ++i) { - params_16[i].copy_(params_32[i].to(torch::kBFloat16)); + params_16[i].copy_(params_32[i].to(torch::kFloat32)); } } @@ -272,14 +282,30 @@ typedef struct { std::unique_ptr create_pufferl(int64_t input_size, int64_t num_atns, int64_t hidden_size, double lr, double beta1, double beta2, double eps, int64_t max_epochs) { + + // Enable cuDNN benchmarking + torch::globalContext().setBenchmarkCuDNN(true); + torch::globalContext().setDeterministicCuDNN(false); + torch::globalContext().setBenchmarkLimitCuDNN(32); + + // Enable TF32 for faster FP32 math (uses Tensor Cores on 4090) + torch::globalContext().setAllowTF32CuBLAS(true); + torch::globalContext().setAllowTF32CuDNN(true); + + // Enable faster FP16 reductions + torch::globalContext().setAllowFP16ReductionCuBLAS(true); + + // BF16 reduction (if using bfloat16) + torch::globalContext().setAllowBF16ReductionCuBLAS(true); + auto policy_16 = new PolicyLSTM(input_size, num_atns, hidden_size); policy_16->to(torch::kCUDA); - policy_16->to(torch::kBFloat16); + policy_16->to(torch::kFloat32); auto policy_32 = new PolicyLSTM(input_size, num_atns, hidden_size); policy_32->to(torch::kCUDA); - auto optimizer = new torch::optim::Adam(policy_16->parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)); + auto optimizer = new torch::optim::Adam(policy_32->parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)); auto pufferl = std::make_unique(); pufferl->policy_16 = policy_16; @@ -316,12 +342,12 @@ std::tuple compiled_evaluate( // No-grad guard torch::NoGradGuard no_grad; - auto obs = obs_input.to(torch::kBFloat16); - auto actions = actions_input.to(torch::kBFloat16); - auto rewards = rewards_input.to(torch::kBFloat16); - auto terminals = terminals_input.to(torch::kBFloat16); - auto lstm_h = lstm_h_input.to(torch::kBFloat16); - auto lstm_c = lstm_c_input.to(torch::kBFloat16); + auto obs = obs_input.to(torch::kFloat32); + auto actions = actions_input.to(torch::kFloat32); + auto rewards = rewards_input.to(torch::kFloat32); + auto terminals = terminals_input.to(torch::kFloat32); + auto lstm_h = lstm_h_input.to(torch::kFloat32); + auto lstm_c = lstm_c_input.to(torch::kFloat32); for (int64_t i = 0; i < horizon; ++i) { // Clamp rewards @@ -392,6 +418,7 @@ pybind11::dict compiled_train( ) { auto& pufferl = pufferl_obj.cast(); auto& policy_16 = pufferl.policy_16; + auto& policy_32 = pufferl.policy_32; auto& optimizer = pufferl.optimizer; if (anneal_lr) { @@ -424,15 +451,15 @@ pybind11::dict compiled_train( auto mb_prio = torch::pow(static_cast(segments) * prio_probs_mb, -anneal_beta); // Select minibatch tensors - auto mb_obs = observations.index_select(0, idx).to(torch::kBFloat16); + auto mb_obs = observations.index_select(0, idx).to(torch::kFloat32); auto mb_actions = actions.index_select(0, idx); - auto mb_logprobs = logprobs.index_select(0, idx).to(torch::kBFloat16); - auto mb_rewards = rewards.index_select(0, idx).to(torch::kBFloat16); - auto mb_terminals = terminals.index_select(0, idx).to(torch::kBFloat16); - auto mb_ratio = ratio.index_select(0, idx).to(torch::kBFloat16); // Not used directly - auto mb_values = values.index_select(0, idx).to(torch::kBFloat16); - auto mb_advantages = advantages.index_select(0, idx).to(torch::kBFloat16); - auto mb_returns = mb_advantages + mb_values.to(torch::kBFloat16); + auto mb_logprobs = logprobs.index_select(0, idx).to(torch::kFloat32); + auto mb_rewards = rewards.index_select(0, idx).to(torch::kFloat32); + auto mb_terminals = terminals.index_select(0, idx).to(torch::kFloat32); + auto mb_ratio = ratio.index_select(0, idx).to(torch::kFloat32); // Not used directly + auto mb_values = values.index_select(0, idx).to(torch::kFloat32); + auto mb_advantages = advantages.index_select(0, idx).to(torch::kFloat32); + auto mb_returns = mb_advantages + mb_values.to(torch::kFloat32); auto original_obs_shape = mb_obs.sizes(); // [minibatch_segments, horizon, grid_size, grid_size] if (!use_rnn) { @@ -444,7 +471,8 @@ pybind11::dict compiled_train( torch::Tensor mb_lstm_c; // Policy forward: Native C++ call - auto [logits, newvalue] = policy_16->forward_train(mb_obs, mb_lstm_h, mb_lstm_c); + auto [logits, newvalue] = policy_32->forward_train(mb_obs.to(torch::kFloat32), mb_lstm_h, mb_lstm_c); + //auto [logits, newvalue] = policy_16->forward_train(mb_obs, mb_lstm_h, mb_lstm_c); // Compute newlogprob and entropy (discrete assumption) auto flat_batch = minibatch_segments * horizon; @@ -522,9 +550,17 @@ PYBIND11_MODULE(_C, m) { m.def("create_squared_environments", &create_squared_environments); m.def("step_environments", &step_environments_cuda); m.def("reset_environments", &reset_environments_cuda); + m.def("log_environments", &log_environments_cuda); m.def("compiled_evaluate", &compiled_evaluate); m.def("compiled_train", &compiled_train); + py::class_(m, "Log") + .def_readwrite("perf", &Log::perf) + .def_readwrite("score", &Log::score) + .def_readwrite("episode_return", &Log::episode_return) + .def_readwrite("episode_length", &Log::episode_length) + .def_readwrite("n", &Log::n); + m.def("create_pufferl", &create_pufferl); py::class_>(m, "PuffeRL") .def_readwrite("policy_16", &pufferlib::PuffeRL::policy_16) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 5bb721bc6..3e2450b84 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -59,6 +59,13 @@ def step(self, actions): _C.step_environments(self.env) return self.observations, self.rewards, self.terminals + def log(self): + return _C.log_environments(self.env, self.indices) + + def close(self): + # TODO + pass + import rich import rich.traceback from rich.table import Table @@ -326,6 +333,13 @@ def evaluate(self): self.num_envs # Or wherever num_envs is stored ) + logs = self.vecenv.log() + self.stats['perf'] = [logs.perf] + self.stats['score'] = [logs.score] + self.stats['episode_return'] = [logs.episode_return] + self.stats['episode_length'] = [logs.episode_length] + self.stats['n'] = [logs.n] + self.global_step += config['batch_size'] profile.end() return self.stats From c68c4c6ba8656f872ccfffde9b25527b5d5f645f Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Mon, 27 Oct 2025 20:54:07 +0000 Subject: [PATCH 081/188] Progress on port -- need a full matching test from python to Cpp for this to work --- pufferlib/config/ocean/squared.ini | 2 +- pufferlib/extensions/cuda/squared_torch.cu | 4 +- pufferlib/extensions/pufferlib.cpp | 257 ++-- pufferlib/models.py | 157 +-- pufferlib/ocean/squared/squared.h | 4 +- pufferlib/pufferl.py | 285 +--- pufferlib/python_pufferl.py | 1429 ++++++++++++++++++++ test_squared_torch.py | 77 +- 8 files changed, 1707 insertions(+), 508 deletions(-) create mode 100644 pufferlib/python_pufferl.py diff --git a/pufferlib/config/ocean/squared.ini b/pufferlib/config/ocean/squared.ini index 5629776a3..d13c3e9f9 100644 --- a/pufferlib/config/ocean/squared.ini +++ b/pufferlib/config/ocean/squared.ini @@ -16,5 +16,5 @@ num_envs = 4096 [train] total_timesteps = 20_000_000 gamma = 0.95 -learning_rate = 0.05 +learning_rate = 0.01 minibatch_size = 32768 diff --git a/pufferlib/extensions/cuda/squared_torch.cu b/pufferlib/extensions/cuda/squared_torch.cu index f35f6a6aa..9fd384a0a 100644 --- a/pufferlib/extensions/cuda/squared_torch.cu +++ b/pufferlib/extensions/cuda/squared_torch.cu @@ -57,10 +57,12 @@ __device__ void cuda_reset(Squared* env, curandState* rng) { env->tick = 0; // Place target randomly (not on agent) - int target_idx; + int target_idx = center; // Deterministic for testing + /* do { target_idx = curand(rng) % tiles; } while (target_idx == center); + */ env->observations[target_idx] = TARGET; } diff --git a/pufferlib/extensions/pufferlib.cpp b/pufferlib/extensions/pufferlib.cpp index 51bdaa952..1150a6e76 100644 --- a/pufferlib/extensions/pufferlib.cpp +++ b/pufferlib/extensions/pufferlib.cpp @@ -154,13 +154,9 @@ class PolicyLSTM : public torch::nn::Module { // Initialization auto encoder_linear = (*encoder)[0]->as(); torch::nn::init::orthogonal_(encoder_linear->weight, std::sqrt(2.0)); - encoder_linear->bias.data().zero_(); - - torch::nn::init::orthogonal_(decoder->weight, 0.01); - decoder->bias.data().zero_(); - - torch::nn::init::orthogonal_(value->weight, 1.0); - value->bias.data().zero_(); + torch::nn::init::constant_(encoder_linear->bias, 0.0); + torch::nn::init::orthogonal_(decoder->weight, std::sqrt(0.01)); + torch::nn::init::constant_(decoder->bias, 0.0); torch::nn::init::orthogonal_(lstm->named_parameters()["weight_ih_l0"], 1.0); torch::nn::init::orthogonal_(lstm->named_parameters()["weight_hh_l0"], 1.0); @@ -298,6 +294,9 @@ std::unique_ptr create_pufferl(int64_t input_size, // BF16 reduction (if using bfloat16) torch::globalContext().setAllowBF16ReductionCuBLAS(true); + // Random seed + torch::manual_seed(42); + auto policy_16 = new PolicyLSTM(input_size, num_atns, hidden_size); policy_16->to(torch::kCUDA); policy_16->to(torch::kFloat32); @@ -320,12 +319,12 @@ std::unique_ptr create_pufferl(int64_t input_size, std::tuple compiled_evaluate( pybind11::object pufferl_obj, torch::Tensor envs_tensor, - torch::Tensor obs_input, - torch::Tensor actions_input, - torch::Tensor rewards_input, - torch::Tensor terminals_input, - torch::Tensor lstm_h_input, - torch::Tensor lstm_c_input, + torch::Tensor obs, + torch::Tensor actions, + torch::Tensor rewards, + torch::Tensor terminals, + torch::Tensor lstm_h, + torch::Tensor lstm_c, torch::Tensor obs_buffer, torch::Tensor act_buffer, torch::Tensor logprob_buffer, @@ -336,68 +335,51 @@ std::tuple compiled_evaluate( int64_t num_envs ) { auto& pufferl = pufferl_obj.cast(); - auto& policy = pufferl.policy_16; - auto& optimizer = pufferl.optimizer; + auto& policy = pufferl.policy_32; - // No-grad guard torch::NoGradGuard no_grad; - auto obs = obs_input.to(torch::kFloat32); - auto actions = actions_input.to(torch::kFloat32); - auto rewards = rewards_input.to(torch::kFloat32); - auto terminals = terminals_input.to(torch::kFloat32); - auto lstm_h = lstm_h_input.to(torch::kFloat32); - auto lstm_c = lstm_c_input.to(torch::kFloat32); - for (int64_t i = 0; i < horizon; ++i) { - // Clamp rewards - auto r = rewards.clamp(-1.0f, 1.0f); - - // Policy forward: Native C++ call - auto [logits, value, lstm_h_out, lstm_c_out] = policy->forward(obs, lstm_h, lstm_c); + auto [logits, value, lstm_h_out, lstm_c_out] = policy->forward(obs.to(torch::kFloat32), lstm_h, lstm_c); lstm_h = lstm_h_out; lstm_c = lstm_c_out; - // Sample action and logprob (assuming discrete categorical from logits) - auto max_logits = logits.amax(1, true); - auto logits_shifted = logits - max_logits; - auto logsumexp = logits_shifted.exp().sum(1, true).log() + max_logits; - auto logprobs = logits - logsumexp; - auto probs = logprobs.exp(); - auto action = at::multinomial(probs, 1, /*replacement=*/true); - auto logprob = logprobs.gather(1, action).squeeze(1); - action = action.squeeze(1); - - // Store to buffers + auto logprobs = torch::log_softmax(logits, 1); + auto action = at::multinomial(logprobs.exp(), 1, true).squeeze(1); + auto logprob = logprobs.gather(1, action.unsqueeze(1)).squeeze(1); + + // Store obs_buffer.select(1, i).copy_(obs.to(torch::kFloat32)); act_buffer.select(1, i).copy_(action.to(torch::kInt32)); logprob_buffer.select(1, i).copy_(logprob.to(torch::kFloat32)); - rew_buffer.select(1, i).copy_(r.to(torch::kFloat32)); + rew_buffer.select(1, i).copy_(rewards.to(torch::kFloat32)); term_buffer.select(1, i).copy_(terminals.to(torch::kFloat32)); val_buffer.select(1, i).copy_(value.flatten().to(torch::kFloat32)); - // Step the environments actions.copy_(action); - step_environments_cuda(envs_tensor, num_envs); + { + pybind11::gil_scoped_release no_gil; + step_environments_cuda(envs_tensor, num_envs); + } + rewards.clamp_(-1.0f, 1.0f); } return std::make_tuple(lstm_h, lstm_c); } -// Updated compiled_train pybind11::dict compiled_train( pybind11::object pufferl_obj, - torch::Tensor observations, // [num_envs, horizon, grid_size, grid_size] uint8 - torch::Tensor actions, // [num_envs, horizon] int32 - torch::Tensor logprobs, // [num_envs, horizon] float - torch::Tensor rewards, // [num_envs, horizon] float - torch::Tensor terminals, // [num_envs, horizon] float - torch::Tensor truncations, // [num_envs, horizon] float (included but not used in loop) - torch::Tensor ratio, // [num_envs, horizon] float - torch::Tensor values, // [num_envs, horizon] float + torch::Tensor observations, // [num_envs, horizon, ...] + torch::Tensor actions, // [num_envs, horizon] + torch::Tensor logprobs, // [num_envs, horizon] + torch::Tensor rewards, // [num_envs, horizon] + torch::Tensor terminals_input, // [num_envs, horizon] + torch::Tensor truncations, // [num_envs, horizon] (not used in puff advantage?) + torch::Tensor ratio, // [num_envs, horizon] + torch::Tensor values, // [num_envs, horizon] int64_t total_minibatches, int64_t minibatch_segments, - int64_t segments, // num_envs + int64_t segments, int64_t accumulate_minibatches, int64_t horizon, double prio_beta0, @@ -417,89 +399,102 @@ pybind11::dict compiled_train( int64_t current_epoch ) { auto& pufferl = pufferl_obj.cast(); - auto& policy_16 = pufferl.policy_16; auto& policy_32 = pufferl.policy_32; auto& optimizer = pufferl.optimizer; + auto device = values.device(); + auto terminals = terminals_input.to(torch::kFloat32); + if (anneal_lr) { double lr = cosine_annealing(pufferl.lr, current_epoch, pufferl.max_epochs); optimizer->param_groups().at(0).options().set_lr(lr); } - // Compute anneal_beta + // Annealed priority exponent double anneal_beta = prio_beta0 + (1.0 - prio_beta0) * prio_alpha * static_cast(current_epoch) / total_epochs; - // Compute advantages - auto advantages = torch::zeros_like(values); - compute_puff_advantage_cuda(values, rewards, terminals, ratio, advantages, gamma, gae_lambda, vtrace_rho_clip, vtrace_c_clip); + // Zero out ratio at start of epoch (matches Python: self.ratio[:] = 1) + ratio.fill_(1.0); - // Prioritize - auto adv = advantages.abs().sum(1); - auto prio_weights = adv.pow(prio_alpha).nan_to_num_(0.0, 0.0, 0.0); - auto sum_weights = prio_weights.sum() + static_cast(adv.size(0)) * 1e-6; - auto prio_probs = (prio_weights + 1e-6) / sum_weights; + // Accumulators + torch::Tensor pg_sum = torch::zeros({}, torch::kFloat32).to(device); + torch::Tensor v_sum = torch::zeros({}, torch::kFloat32).to(device); + torch::Tensor ent_sum = torch::zeros({}, torch::kFloat32).to(device); + torch::Tensor total_sum = torch::zeros({}, torch::kFloat32).to(device); + torch::Tensor old_approx_kl_sum = torch::zeros({}, torch::kFloat32).to(device); + torch::Tensor approx_kl_sum = torch::zeros({}, torch::kFloat32).to(device); + torch::Tensor clipfrac_sum = torch::zeros({}, torch::kFloat32).to(device); + torch::Tensor importance_sum = torch::zeros({}, torch::kFloat32).to(device); - auto device = values.device(); - auto pg_sum = torch::zeros({}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); - auto v_sum = torch::zeros({}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); - auto ent_sum = torch::zeros({}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); - auto total_sum = torch::zeros({}, torch::TensorOptions().dtype(torch::kFloat32).device(device)); + optimizer->zero_grad(); // Start with clean grads + auto advantages = torch::zeros_like(values); for (int64_t mb = 0; mb < total_minibatches; ++mb) { - auto idx = at::multinomial(prio_probs, minibatch_segments, /*replacement=*/true); - auto prio_probs_mb = prio_probs.index_select(0, idx).unsqueeze(1); - auto mb_prio = torch::pow(static_cast(segments) * prio_probs_mb, -anneal_beta); - - // Select minibatch tensors - auto mb_obs = observations.index_select(0, idx).to(torch::kFloat32); - auto mb_actions = actions.index_select(0, idx); - auto mb_logprobs = logprobs.index_select(0, idx).to(torch::kFloat32); - auto mb_rewards = rewards.index_select(0, idx).to(torch::kFloat32); - auto mb_terminals = terminals.index_select(0, idx).to(torch::kFloat32); - auto mb_ratio = ratio.index_select(0, idx).to(torch::kFloat32); // Not used directly - auto mb_values = values.index_select(0, idx).to(torch::kFloat32); - auto mb_advantages = advantages.index_select(0, idx).to(torch::kFloat32); - auto mb_returns = mb_advantages + mb_values.to(torch::kFloat32); - - auto original_obs_shape = mb_obs.sizes(); // [minibatch_segments, horizon, grid_size, grid_size] + advantages = torch::zeros_like(values); + compute_puff_advantage_cuda( + values, rewards, terminals, ratio, + advantages, gamma, gae_lambda, + vtrace_rho_clip, vtrace_c_clip + ); + + // Prioritization + auto adv = advantages.abs().sum(1); // [num_envs] + auto prio_weights = adv.pow(prio_alpha).nan_to_num_(0.0, 0.0, 0.0); + auto prio_probs = (prio_weights + 1e-6)/(prio_weights.sum() + 1e-6); + auto idx = at::multinomial(prio_probs, minibatch_segments); // #Replacement? + auto mb_prio = torch::pow(segments*prio_probs.index_select(0, idx).unsqueeze(1), -anneal_beta); + + // Index into data + torch::Tensor mb_obs = observations.index_select(0, idx); + torch::Tensor mb_actions = actions.index_select(0, idx); + torch::Tensor mb_logprobs = logprobs.index_select(0, idx); + torch::Tensor mb_values = values.index_select(0, idx); + torch::Tensor mb_advantages = advantages.index_select(0, idx); + torch::Tensor mb_returns = mb_advantages + mb_values; + + // Reshape obs if not using RNN if (!use_rnn) { - mb_obs = mb_obs.reshape({-1, original_obs_shape[2], original_obs_shape[3]}); + auto flat_shape = std::vector{-1, mb_obs.size(2), mb_obs.size(3)}; + mb_obs = mb_obs.reshape(flat_shape); } - // Initial LSTM states (undefined for zero init) - torch::Tensor mb_lstm_h; - torch::Tensor mb_lstm_c; + // HARDCODED LSTM SIZE 128 + // Initial LSTM states (zero or none) + torch::Tensor mb_lstm_h = torch::zeros( + {1, minibatch_segments, 128}, + torch::kFloat32 + ).to(device); + torch::Tensor mb_lstm_c = torch::zeros_like(mb_lstm_h); - // Policy forward: Native C++ call + // Forward pass auto [logits, newvalue] = policy_32->forward_train(mb_obs.to(torch::kFloat32), mb_lstm_h, mb_lstm_c); - //auto [logits, newvalue] = policy_16->forward_train(mb_obs, mb_lstm_h, mb_lstm_c); - - // Compute newlogprob and entropy (discrete assumption) - auto flat_batch = minibatch_segments * horizon; - auto flat_logits = logits.reshape({flat_batch, -1}); - auto flat_actions = mb_actions.reshape({flat_batch}); - auto max_logits = flat_logits.amax(1, true); - auto logits_shifted = flat_logits - max_logits; - auto logsumexp = (logits_shifted.exp().sum(1, true)).log() + max_logits; - auto logprobs = flat_logits - logsumexp; // Correct - auto probs = logprobs.exp(); - auto newlogprob_flat = logprobs.gather(1, flat_actions.unsqueeze(1)).squeeze(1); + + // Flatten for action lookup + auto flat_logits = logits.reshape({-1, logits.size(-1)}); + auto flat_actions = mb_actions.reshape({-1}); + auto logprobs_new = torch::log_softmax(flat_logits, /*dim=*/1); + auto probs_new = logprobs_new.exp(); + + // Gather logprobs for taken actions + auto newlogprob_flat = logprobs_new.gather(1, flat_actions.unsqueeze(1)).squeeze(1); auto newlogprob = newlogprob_flat.reshape({minibatch_segments, horizon}); - auto entropy = - (probs * logprobs).sum(1).mean(); + auto entropy = - (probs_new * logprobs_new).sum(1).mean(); // mean over batch + // Compute ratio auto logratio = newlogprob - mb_logprobs; - auto mb_ratio_new = logratio.exp(); + auto ratio_new = logratio.exp(); - // Update full ratio (detach) - ratio.index_copy_(0, idx, mb_ratio_new.detach().to(torch::kFloat32)); + // Update global ratio and values in-place (matches Python) + ratio.index_copy_(0, idx, ratio_new.detach().squeeze(-1).to(torch::kFloat32)); + values.index_copy_(0, idx, newvalue.detach().squeeze(-1).to(torch::kFloat32)); - // Advantages normalization - auto adv = mb_advantages; - adv = mb_prio * (adv - adv.mean()) / (adv.std() + 1e-8); + // Normalize advantages: (adv - mean) / std, then weight + auto adv_normalized = mb_advantages; + adv_normalized = mb_prio * (adv_normalized - adv_normalized.mean()) / (adv_normalized.std() + 1e-8); // Policy loss - auto pg_loss1 = -adv * mb_ratio_new; - auto pg_loss2 = -adv * torch::clamp(mb_ratio_new, 1.0 - clip_coef, 1.0 + clip_coef); + auto pg_loss1 = -adv_normalized * ratio_new; + auto pg_loss2 = -adv_normalized * torch::clamp(ratio_new, 1.0 - clip_coef, 1.0 + clip_coef); auto pg_loss = torch::max(pg_loss1, pg_loss2).mean(); // Value loss @@ -509,42 +504,68 @@ pybind11::dict compiled_train( auto v_loss_clipped = (v_clipped - mb_returns).pow(2); auto v_loss = 0.5 * torch::max(v_loss_unclipped, v_loss_clipped).mean(); - // Entropy loss + // Entropy auto entropy_loss = entropy; // Already mean // Total loss - auto loss = pg_loss + vf_coef * v_loss - ent_coef * entropy_loss; + auto loss = pg_loss + vf_coef*v_loss - ent_coef*entropy_loss; + + // Accumulate stats pg_sum += pg_loss.detach(); v_sum += v_loss.detach(); - ent_sum += entropy.detach(); + ent_sum += entropy_loss.detach(); total_sum += loss.detach(); + // KL and clipping diagnostics (matches Python) + { + torch::NoGradGuard no_grad; + auto old_kl = (-logratio).mean(); + auto kl = ((ratio_new - 1) - logratio).mean(); + auto cf = (ratio_new - 1.0).abs().gt(clip_coef).to(torch::kFloat32).mean(); + auto imp = ratio_new.mean(); + + old_approx_kl_sum += old_kl.detach(); + approx_kl_sum += kl.detach(); + clipfrac_sum += cf.detach(); + importance_sum += imp.detach(); + } + + // Backward pass { pybind11::gil_scoped_release no_gil; loss.backward(); } - // Update values - values.index_copy_(0, idx, newvalue.detach().to(torch::kFloat32)); - - // Accumulate and step + // Gradient accumulation and step if ((mb + 1) % accumulate_minibatches == 0) { - torch::nn::utils::clip_grad_norm_(policy_16->parameters(), max_grad_norm); + torch::nn::utils::clip_grad_norm_(policy_32->parameters(), max_grad_norm); optimizer->step(); optimizer->zero_grad(); } } + // Compute explained variance at end of epoch + auto y_true = advantages.flatten() + values.flatten(); + auto y_pred = values.flatten(); + auto var_y = y_true.var(); + //double explained_var = (var_y.abs() < 1e-8) ? NAN : (1 - (y_true - y_pred).var() / var_y).item(); + + // Return losses (averaged) pybind11::dict losses; auto num_mb = static_cast(total_minibatches); losses["pg_loss"] = (pg_sum / num_mb).item(); losses["v_loss"] = (v_sum / num_mb).item(); losses["entropy"] = (ent_sum / num_mb).item(); losses["total_loss"] = (total_sum / num_mb).item(); + losses["old_approx_kl"] = (old_approx_kl_sum / num_mb).item(); + losses["approx_kl"] = (approx_kl_sum / num_mb).item(); + losses["clipfrac"] = (clipfrac_sum / num_mb).item(); + losses["importance"] = (importance_sum / num_mb).item(); + //losses["explained_variance"] = explained_var; + return losses; } - // PYBIND11_MODULE with the extension name (pufferlib._C) PYBIND11_MODULE(_C, m) { m.def("create_squared_environments", &create_squared_environments); diff --git a/pufferlib/models.py b/pufferlib/models.py index d9f49fd01..2a1255cd7 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -8,7 +8,6 @@ import pufferlib.pytorch import pufferlib.spaces - class Default(nn.Module): '''Default PyTorch policy. Flattens obs and applies a linear layer. @@ -97,108 +96,6 @@ def decode_actions(self, hidden): values = self.value(hidden) return logits, values -class Default(nn.Module): - '''Default PyTorch policy. Flattens obs and applies a linear layer. - - PufferLib is not a framework. It does not enforce a base class. - You can use any PyTorch policy that returns actions and values. - We structure our forward methods as encode_observations and decode_actions - to make it easier to wrap policies with LSTMs. You can do that and use - our LSTM wrapper or implement your own. To port an existing policy - for use with our LSTM wrapper, simply put everything from forward() before - the recurrent cell into encode_observations and put everything after - into decode_actions. - ''' - def __init__(self, env, hidden_size=128): - super().__init__() - self.hidden_size = hidden_size - self.is_multidiscrete = isinstance(env.single_action_space, - pufferlib.spaces.MultiDiscrete) - self.is_continuous = isinstance(env.single_action_space, - pufferlib.spaces.Box) - - num_obs = int(np.prod(env.single_observation_space.shape)) - self.encoder = torch.nn.Sequential( - pufferlib.pytorch.layer_init(nn.Linear(num_obs, hidden_size)), - nn.GELU(), - ) - num_atns = int(env.single_action_space.n) - self.decoder = pufferlib.pytorch.layer_init( - nn.Linear(hidden_size, num_atns), std=0.01) - self.value = pufferlib.pytorch.layer_init( - nn.Linear(hidden_size, 1), std=1) - - def forward_eval(self, observations, state=None): - hidden = self.encode_observations(observations, state=state) - logits, values = self.decode_actions(hidden) - return logits, values - - def forward(self, observations, state=None): - return self.forward_eval(observations, state) - - def encode_observations(self, observations, state=None): - '''Encodes a batch of observations into hidden states. Assumes - no time dimension (handled by LSTM wrappers).''' - batch_size = observations.shape[0] - observations = observations.view(batch_size, -1) - return self.encoder(observations.float()) - - def decode_actions(self, hidden): - '''Decodes a batch of hidden states into (multi)discrete actions. - Assumes no time dimension (handled by LSTM wrappers).''' - logits = self.decoder(hidden) - values = self.value(hidden) - return logits, values - -def forward_train(policy, lstm, observations, state): - #Forward function for training. Uses LSTM for fast time-batching - x = observations - lstm_h = state['lstm_h'] - lstm_c = state['lstm_c'] - - x_shape, space_shape = x.shape, policy.obs_shape - x_n, space_n = len(x_shape), len(space_shape) - if x_shape[-space_n:] != space_shape: - raise ValueError('Invalid input tensor shape', x.shape) - - if x_n == space_n + 1: - B, TT = x_shape[0], 1 - elif x_n == space_n + 2: - B, TT = x_shape[:2] - else: - raise ValueError('Invalid input tensor shape', x.shape) - - if lstm_h is not None: - assert lstm_h.shape[1] == lstm_c.shape[1] == B, 'LSTM state must be (h, c)' - lstm_state = (lstm_h, lstm_c) - else: - lstm_state = None - - x = x.reshape(B*TT, *space_shape) - hidden = policy.policy.encode_observations(x, state) - assert hidden.shape == (B*TT, policy.input_size) - - hidden = hidden.reshape(B, TT, policy.input_size) - - hidden = hidden.transpose(0, 1) - #hidden = self.pre_layernorm(hidden) - hidden, (lstm_h, lstm_c) = lstm(hidden, lstm_state) - hidden = hidden.float() - - #hidden = self.post_layernorm(hidden) - hidden = hidden.transpose(0, 1) - - flat_hidden = hidden.reshape(B*TT, policy.hidden_size) - logits, values = policy.policy.decode_actions(flat_hidden) - values = values.reshape(B, TT) - #state.batch_logits = logits.reshape(B, TT, -1) - state['hidden'] = hidden - state['lstm_h'] = lstm_h.detach() - state['lstm_c'] = lstm_c.detach() - return logits, values - - - class LSTMWrapper(nn.Module): def __init__(self, env, policy, hidden_size=128): '''Wraps your policy with an LSTM without letting you shoot yourself in the @@ -233,9 +130,11 @@ def __init__(self, env, policy, hidden_size=128): #self.pre_layernorm = nn.LayerNorm(hidden_size) #self.post_layernorm = nn.LayerNorm(hidden_size) - def forward(self, observations, h, c): + def forward_eval(self, observations, state): '''Forward function for inference. 3x faster than using LSTM directly''' - hidden = self.policy.encode_observations(observations) + hidden = self.policy.encode_observations(observations, state=state) + h = state['lstm_h'] + c = state['lstm_c'] # TODO: Don't break compile if h is not None: @@ -244,11 +143,16 @@ def forward(self, observations, h, c): else: lstm_state = None + #hidden = self.pre_layernorm(hidden) hidden, c = self.cell(hidden, lstm_state) + #hidden = self.post_layernorm(hidden) + state['hidden'] = hidden + state['lstm_h'] = hidden + state['lstm_c'] = c logits, values = self.policy.decode_actions(hidden) - return logits, values, hidden, c + return logits, values - def forward_train(self, observations, state): + def forward(self, observations, state): '''Forward function for training. Uses LSTM for fast time-batching''' x = observations lstm_h = state['lstm_h'] @@ -295,45 +199,6 @@ def forward_train(self, observations, state): state['lstm_c'] = lstm_c.detach() return logits, values -''' -class LSTMWrapper(nn.Module): - def __init__(self, env, policy, hidden_size=128): - super().__init__() - self.obs_shape = env.single_observation_space.shape - input_size = hidden_size - - self.policy = policy - self.input_size = input_size - self.hidden_size = hidden_size - self.is_continuous = self.policy.is_continuous - - self.cell = sLSTM(input_size, hidden_size, 4) - - #self.cell = torch.nn.LSTMCell(input_size, hidden_size) - #self.cell.weight_ih = self.lstm.weight_ih_l0 - #self.cell.weight_hh = self.lstm.weight_hh_l0 - #self.cell.bias_ih = self.lstm.bias_ih_l0 - #self.cell.bias_hh = self.lstm.bias_hh_l0 - - #self.pre_layernorm = nn.LayerNorm(hidden_size) - #self.post_layernorm = nn.LayerNorm(hidden_size) - - def forward(self, observations, c, n, h, m): - hidden = self.policy.encode_observations(observations) - - # TODO: Don't break compile - if h is not None: - assert h.shape[0] == c.shape[0] == observations.shape[0], 'LSTM state must be (h, c)' - lstm_state = (h, c) - else: - lstm_state = None - - hidden, c = self.cell(hidden, lstm_state) - logits, values = self.policy.decode_actions(hidden) - return logits, values, hidden, c -''' - - class Convolutional(nn.Module): def __init__(self, env, *args, framestack, flat_size, input_size=512, hidden_size=512, output_size=512, diff --git a/pufferlib/ocean/squared/squared.h b/pufferlib/ocean/squared/squared.h index 512d95d25..be57ea75c 100644 --- a/pufferlib/ocean/squared/squared.h +++ b/pufferlib/ocean/squared/squared.h @@ -58,10 +58,12 @@ void c_reset(Squared* env) { env->r = env->size/2; env->c = env->size/2; env->tick = 0; - int target_idx; + int target_idx = tiles/2; // Deterministic for testing + /* do { target_idx = rand() % tiles; } while (target_idx == tiles/2); + */ env->observations[target_idx] = TARGET; } diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 3e2450b84..ff4d0e9b7 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -30,7 +30,6 @@ import torch.utils.cpp_extension import pufferlib -from pufferlib.models import forward_train import pufferlib.sweep import pufferlib.vector import pufferlib.pytorch @@ -83,6 +82,7 @@ def close(self): #torch.autograd.set_detect_anomaly(True) #torch._dynamo.config.capture_scalar_outputs = True + class PuffeRL: #def __init__(self, config, vecenv, policy, logger=None, verbose=True): #def __init__(self, config, policy, logger=None, verbose=True): @@ -94,9 +94,9 @@ def __init__(self, config, logger=None, verbose=True): # Reproducibility seed = config['seed'] - #random.seed(seed) - #np.random.seed(seed) - #torch.manual_seed(seed) + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) num_envs = 4096 self.num_envs = num_envs @@ -178,37 +178,14 @@ def __init__(self, config, logger=None, verbose=True): f'minibatch_size {self.minibatch_size} must be divisible by bptt_horizon {horizon}' ) - # Torch compile - #self.uncompiled_policy = policy - #self.policy = policy - #self.compiled_policy = torch.compile(policy) - #if config['compile']: - # self.policy = torch.compile(policy, mode=config['compile_mode']) - # #self.policy.forward_eval = torch.compile(policy, mode=config['compile_mode']) - # pufferlib.pytorch.sample_logits = torch.compile(pufferlib.pytorch.sample_logits, mode=config['compile_mode']) - # Optimizer ''' - if config['optimizer'] == 'adam': - optimizer = torch.optim.Adam( - self.policy.parameters(), - lr=config['learning_rate'], - betas=(config['adam_beta1'], config['adam_beta2']), - eps=config['adam_eps'], - ) - elif config['optimizer'] == 'muon': - from heavyball import ForeachMuon - warnings.filterwarnings(action='ignore', category=UserWarning, module=r'heavyball.*') - import heavyball.utils - heavyball.utils.compile_mode = config['compile_mode'] if config['compile'] else None - optimizer = ForeachMuon( - self.policy.parameters(), - lr=config['learning_rate'], - betas=(config['adam_beta1'], config['adam_beta2']), - eps=config['adam_eps'], - ) - else: - raise ValueError(f'Unknown optimizer: {config["optimizer"]}') + optimizer = torch.optim.Adam( + self.policy.parameters(), + lr=config['learning_rate'], + betas=(config['adam_beta1'], config['adam_beta2']), + eps=config['adam_eps'], + ) ''' # Logging self.logger = logger @@ -230,13 +207,6 @@ def __init__(self, config, logger=None, verbose=True): config['adam_eps'], epochs, ) - # Automatic mixed precision - precision = config['precision'] - self.amp_context = contextlib.nullcontext() - #if config.get('amp', True) and config['device'] == 'cuda': - # self.amp_context = torch.amp.autocast(device_type='cuda', dtype=getattr(torch, precision)) - #if precision not in ('float32', 'bfloat16'): - # raise pufferlib.APIUsageError(f'Invalid precision: {precision}: use float32 or bfloat16') # Initializations self.config = config @@ -268,36 +238,6 @@ def sps(self): return (self.global_step - self.last_log_step) / (time.time() - self.last_log_time) - def _compiled_evaluate(self, lstm_h, lstm_c): - config = self.config - device = config['device'] - o = self.vecenv.observations - r = self.vecenv.rewards - d = self.vecenv.terminals - t = torch.zeros(o.shape[0], dtype=torch.bool, device=device) - for i in range(config['bptt_horizon']): - done_mask = d - self.global_step += self.num_envs - - with torch.no_grad(), self.amp_context: - logits, value, lstm_h, lstm_c = self.policy(o, lstm_h, lstm_c) - action, logprob, _ = pufferlib.pytorch.sample_logits(logits) - r = torch.clamp(r, -1, 1) - - with torch.no_grad(): - self.observations[:, i] = o - self.actions[:, i] = action - self.logprobs[:, i] = logprob - self.rewards[:, i] = r - self.terminals[:, i] = d.float() - self.values[:, i] = value.flatten() - - if isinstance(logits, torch.distributions.Normal): - action = torch.clamp(action, self.vecenv.action_space.low, self.vecenv.action_space.high) - - self.vecenv.step(action) - - def evaluate(self): profile = self.profile epoch = self.epoch @@ -330,7 +270,7 @@ def evaluate(self): self.terminals, self.values, self.config['bptt_horizon'], - self.num_envs # Or wherever num_envs is stored + self.num_envs ) logs = self.vecenv.log() @@ -351,6 +291,8 @@ def train(self): config = self.config device = config['device'] + self.ratio[:] = 1 + losses = _C.compiled_train( self.pufferl_cpp, self.observations, @@ -712,166 +654,6 @@ def clear(self): prof['buffer'] = prof['delta'] prof['delta'] = 0 -@torch.compile(fullgraph=True, mode='reduce-overhead') -def lstm_loop(policy, params_and_buffers, mb_obs, h, c, logits, newvalue, horizon): - for t in range(horizon): - l, n, h, c = func.functional_call(policy, params_and_buffers, (mb_obs[:, t], h, c)) - logits[:, t] = l - newvalue[:, t] = n - - return logits, newvalue - -def compute_loss(params_and_buffers, policy, lstm, mb_obs, - mb_actions, mb_logprobs, mb_rewards, mb_terminals, mb_truncations, - mb_ratio, mb_values, mb_returns, mb_advantages, mb_prio, adv, epoch, idx, - config, clip_coef, vf_clip, num_minibatches, mb, horizon, num_atns, segments): - segments = int(segments) - num_atns = int(num_atns) - logits = torch.empty(segments, horizon, num_atns, device=config['device']) - newvalue = torch.empty(segments, horizon, 1, device=config['device']) - #logits = [] - #newvalue = [] - #h = None - #c = None - - ''' - h = torch.zeros(segments, 128, device=config['device']) - c = torch.zeros(segments, 128, device=config['device']) - logits, values = lstm_loop(policy, params_and_buffers, mb_obs, h, c, logits, newvalue, horizon) - ''' - - state = { - 'lstm_h': None, - 'lstm_c': None - } - logits, values = forward_train(policy, lstm, mb_obs, state) - - - #for t in range(horizon): - # l, n, h, c = func.functional_call(policy, params_and_buffers, (mb_obs[:, t], h, c)) - # #l, n, h, c = func.functional_call(policy, params_and_buffers, (mb_obs[:, t], None, None)) - # logits[:, t] = l - # newvalue[:, t] = n - # #logits.append(l) - # #newvalue.append(n) - - logits = logits.reshape(segments*horizon, num_atns) - - #logits, newvalue, h, c = policy(mb_obs, h, c) - #logits, newvalue = func.functional_call(policy, params_and_buffers, mb_obs) - - #logits = torch.cat(logits, dim=0) - #newvalue = torch.stack(newvalue, dim=1) - - actions, newlogprob, entropy = pufferlib.pytorch.sample_logits(logits, action=mb_actions) - - #profile('train_misc', epoch) - newlogprob = newlogprob.reshape(mb_logprobs.shape) - logratio = newlogprob - mb_logprobs - ratio = logratio.exp() - #self.ratio[idx] = ratio.detach() - - with torch.no_grad(): - old_approx_kl = (-logratio).mean() - approx_kl = ((ratio - 1) - logratio).mean() - clipfrac = ((ratio - 1.0).abs() > config['clip_coef']).float().mean() - - adv_new = compute_puff_advantage(mb_values, mb_rewards, mb_terminals, - ratio, adv, config['gamma'], config['gae_lambda'], - config['vtrace_rho_clip'], config['vtrace_c_clip']) - adv_new = mb_prio * (adv_new - adv_new.mean()) / (adv_new.std() + 1e-8) - - # Losses - pg_loss1 = -adv_new * ratio - pg_loss2 = -adv_new * torch.clamp(ratio, 1 - clip_coef, 1 + clip_coef) - pg_loss = torch.max(pg_loss1, pg_loss2).mean() - - newvalue = newvalue.view(mb_returns.shape) - v_clipped = mb_values + torch.clamp(newvalue - mb_values, -vf_clip, vf_clip) - v_loss_unclipped = (newvalue - mb_returns) ** 2 - v_loss_clipped = (v_clipped - mb_returns) ** 2 - v_loss = 0.5*torch.max(v_loss_unclipped, v_loss_clipped).mean() - - entropy_loss = entropy.mean() - - loss = pg_loss + config['vf_coef']*v_loss - config['ent_coef']*entropy_loss - return loss - -#@torch.compile(fullgraph=True) -def _compiled_train(policy, lstm, optimizer, observations, actions, logprobs, rewards, - terminals, truncations, ratio, values, epoch, total_epochs, - minibatch_segments, segments, obs_shape, atn_shape, config): - - config['device'] - horizon = config['bptt_horizon'] - device = config['device'] - b0 = config['prio_beta0'] - a = config['prio_alpha'] - clip_coef = config['clip_coef'] - vf_clip = config['vf_clip_coef'] - anneal_beta = b0 + (1 - b0)*a*epoch/total_epochs - - ratio = torch.ones_like(values) - - num_minibatches = config['num_minibatches'] - - lr = optimizer.param_groups[0]['lr'] - buffers = {}#dict(policy.named_buffers()) - param_names = [k for k, v in policy.named_parameters() if v.requires_grad] - params = [v for k, v in policy.named_parameters() if v.requires_grad] - params_dict = dict(zip(param_names, params)) - params_and_buffers = {**buffers, **params_dict} - - for mb in range(num_minibatches): - shape = values.shape - advantages = torch.zeros(shape, device=device) - advantages = compute_puff_advantage(values, rewards, - terminals, ratio, advantages, config['gamma'], - config['gae_lambda'], config['vtrace_rho_clip'], config['vtrace_c_clip']) - - adv = advantages.abs().sum(axis=1) - prio_weights = torch.nan_to_num(adv**a, 0, 0, 0) - prio_probs = (prio_weights + 1e-6)/(prio_weights.sum() + 1e-6) - idx = torch.multinomial(prio_probs, - minibatch_segments, replacement=True) - mb_prio = (segments*prio_probs[idx, None])**-anneal_beta - mb_obs = observations[idx] - mb_actions = actions[idx] - mb_logprobs = logprobs[idx] - mb_rewards = rewards[idx] - mb_terminals = terminals[idx] - mb_truncations = truncations[idx] - mb_ratio = ratio[idx] - mb_values = values[idx] - mb_returns = advantages[idx] + mb_values - mb_advantages = advantages[idx] - - if not config['use_rnn']: - mb_obs = mb_obs.reshape(-1, *obs_shape) - - - adv = advantages[idx] - - optimizer.zero_grad() - loss = compute_loss(params_and_buffers, policy, lstm, mb_obs, - mb_actions, mb_logprobs, mb_rewards, mb_terminals, mb_truncations, - mb_ratio, mb_values, mb_returns, mb_advantages, mb_prio, adv, epoch, idx, - config, clip_coef, vf_clip, num_minibatches, mb, horizon, atn_shape, minibatch_segments) - loss.backward() - optimizer.step() - - ''' - grad_fn = func.grad(compute_loss, has_aux=False) - grads = grad_fn(params_and_buffers, policy, lstm, mb_obs, - mb_actions, mb_logprobs, mb_rewards, mb_terminals, mb_truncations, - mb_ratio, mb_values, mb_returns, mb_advantages, mb_prio, adv, epoch, idx, - config, clip_coef, vf_clip, num_minibatches, mb, horizon, atn_shape, minibatch_segments) - - for name, param in zip(param_names, params): - if name in grads: - param.data.sub_(lr * grads[name]) - ''' - class Utilization(Thread): def __init__(self, delay=1, maxlen=20): super().__init__() @@ -1019,7 +801,44 @@ def download(self): data_dir = artifact.download() model_file = max(os.listdir(data_dir)) return f'{data_dir}/{model_file}' - + +def check(env_name): + args = load_config(env_name) + vecenv = load_env(env_name, args) + policy = load_policy(args, vecenv, env_name) + train_config = dict(**args['train'], env=env_name) + import pufferlib.python_pufferl + pufferl_python = pufferlib.python_pufferl.PuffeRL(train_config, vecenv, policy) + + # TODO: remember to set seet again before this + #pufferl_python.evaluate() + #pufferl_python.train() + + pufferl_cpp = PuffeRL(train_config) + python_params = dict(policy.named_parameters()) + for k, v in pufferl_cpp.pufferl_cpp.policy_32.named_parameters(): + v_python = python_params[f'policy.{k}'].data + assert torch.allclose(v, v_python), k + + breakpoint() + + pufferl_cpp.evaluate() + pufferl_cpp.train() + + for i in range(pufferl_python.observations.shape[1]): + assert torch.allclose(pufferl_python.observations[:, i].float(), pufferl_cpp.observations[:, i]), f'Observation {i} mismatch' + + assert torch.allclose(pufferl_python.observations.float(), pufferl_cpp.observations) + assert torch.allclose(pufferl_python.actions, pufferl_cpp.actions) + assert torch.allclose(pufferl_python.rewards, pufferl_cpp.rewards) + assert torch.allclose(pufferl_python.terminals, pufferl_cpp.terminals) + assert torch.allclose(pufferl_python.values, pufferl_cpp.values) + + for k, v in policy.named_parameters(): + assert torch.allclose(v, pufferl_cpp.policy.named_parameters()[k].data) + + print('Check passed') + def train(env_name, args=None, vecenv=None, policy=None, logger=None, verbose=True): args = args or load_config(env_name) @@ -1553,6 +1372,8 @@ def main(): profile(env_name=env_name) elif mode == 'export': export(env_name=env_name) + elif mode == 'check': + check(env_name=env_name) else: raise pufferlib.APIUsageError(err) diff --git a/pufferlib/python_pufferl.py b/pufferlib/python_pufferl.py new file mode 100644 index 000000000..d251a1fa5 --- /dev/null +++ b/pufferlib/python_pufferl.py @@ -0,0 +1,1429 @@ +## puffer [train | eval | sweep] [env_name] [optional args] -- See https://puffer.ai for full detail0 +# This is the same as python -m pufferlib.pufferl [train | eval | sweep] [env_name] [optional args] +# Distributed example: torchrun --standalone --nnodes=1 --nproc-per-node=6 -m pufferlib.pufferl train puffer_nmmo3 + +import contextlib +import warnings +warnings.filterwarnings('error', category=RuntimeWarning) + +import os +import sys +import glob +import ast +import time +import random +import shutil +import argparse +import importlib +import configparser +from threading import Thread +from collections import defaultdict, deque + +import numpy as np +import psutil + +import torch +import torch.distributed +from torch.distributed.elastic.multiprocessing.errors import record +import torch.utils.cpp_extension + +import pufferlib +import pufferlib.sweep +import pufferlib.vector +import pufferlib.pytorch +try: + from pufferlib import _C +except ImportError: + raise ImportError('Failed to import C/CUDA advantage kernel. If you have non-default PyTorch, try installing with --no-build-isolation') + +import rich +import rich.traceback +from rich.table import Table +from rich.console import Console +from rich_argparse import RichHelpFormatter +rich.traceback.install(show_locals=False) + +import signal # Aggressively exit on ctrl+c +signal.signal(signal.SIGINT, lambda sig, frame: os._exit(0)) + +# Assume advantage kernel has been built if CUDA compiler is available +ADVANTAGE_CUDA = shutil.which("nvcc") is not None + +class PuffeRL: + def __init__(self, config, vecenv, policy, logger=None, verbose=True): + # Backend perf optimization + torch.set_float32_matmul_precision('high') + torch.backends.cudnn.deterministic = config['torch_deterministic'] + torch.backends.cudnn.benchmark = True + + # Reproducibility + seed = config['seed'] + #random.seed(seed) + #np.random.seed(seed) + #torch.manual_seed(seed) + + # Vecenv info + vecenv.async_reset(seed) + obs_space = vecenv.single_observation_space + atn_space = vecenv.single_action_space + total_agents = vecenv.num_agents + self.total_agents = total_agents + + # Experience + if config['batch_size'] == 'auto' and config['bptt_horizon'] == 'auto': + raise pufferlib.APIUsageError('Must specify batch_size or bptt_horizon') + elif config['batch_size'] == 'auto': + config['batch_size'] = total_agents * config['bptt_horizon'] + elif config['bptt_horizon'] == 'auto': + config['bptt_horizon'] = config['batch_size'] // total_agents + + batch_size = config['batch_size'] + horizon = config['bptt_horizon'] + segments = batch_size // horizon + self.segments = segments + if total_agents > segments: + raise pufferlib.APIUsageError( + f'Total agents {total_agents} <= segments {segments}' + ) + + device = config['device'] + self.observations = torch.zeros(segments, horizon, *obs_space.shape, + dtype=pufferlib.pytorch.numpy_to_torch_dtype_dict[obs_space.dtype], + pin_memory=device == 'cuda' and config['cpu_offload'], + device='cpu' if config['cpu_offload'] else device) + self.actions = torch.zeros(segments, horizon, *atn_space.shape, device=device, + dtype=pufferlib.pytorch.numpy_to_torch_dtype_dict[atn_space.dtype]) + self.values = torch.zeros(segments, horizon, device=device) + self.logprobs = torch.zeros(segments, horizon, device=device) + self.rewards = torch.zeros(segments, horizon, device=device) + self.terminals = torch.zeros(segments, horizon, device=device) + self.truncations = torch.zeros(segments, horizon, device=device) + self.ratio = torch.ones(segments, horizon, device=device) + self.importance = torch.ones(segments, horizon, device=device) + self.ep_lengths = torch.zeros(total_agents, device=device, dtype=torch.int32) + self.ep_indices = torch.arange(total_agents, device=device, dtype=torch.int32) + self.free_idx = total_agents + + # LSTM + if config['use_rnn']: + n = vecenv.agents_per_batch + h = policy.hidden_size + self.lstm_h = {i*n: torch.zeros(n, h, device=device) for i in range(total_agents//n)} + self.lstm_c = {i*n: torch.zeros(n, h, device=device) for i in range(total_agents//n)} + + # Minibatching & gradient accumulation + minibatch_size = config['minibatch_size'] + max_minibatch_size = config['max_minibatch_size'] + self.minibatch_size = min(minibatch_size, max_minibatch_size) + self.accumulate_minibatches = max(1, minibatch_size // max_minibatch_size) + self.minibatch_segments = self.minibatch_size // horizon + if self.minibatch_segments * horizon != self.minibatch_size: + raise pufferlib.APIUsageError( + f'minibatch_size {self.minibatch_size} must be divisible by bptt_horizon {horizon}' + ) + + # Torch compile + self.uncompiled_policy = policy + self.policy = policy + if config['compile']: + self.policy = torch.compile(policy, mode=config['compile_mode']) + self.policy.forward_eval = torch.compile(policy, mode=config['compile_mode']) + pufferlib.pytorch.sample_logits = torch.compile(pufferlib.pytorch.sample_logits, mode=config['compile_mode']) + + # Optimizer + if config['optimizer'] == 'adam': + optimizer = torch.optim.Adam( + self.policy.parameters(), + lr=config['learning_rate'], + betas=(config['adam_beta1'], config['adam_beta2']), + eps=config['adam_eps'], + ) + elif config['optimizer'] == 'muon': + from heavyball import ForeachMuon + warnings.filterwarnings(action='ignore', category=UserWarning, module=r'heavyball.*') + import heavyball.utils + heavyball.utils.compile_mode = config['compile_mode'] if config['compile'] else None + optimizer = ForeachMuon( + self.policy.parameters(), + lr=config['learning_rate'], + betas=(config['adam_beta1'], config['adam_beta2']), + eps=config['adam_eps'], + ) + else: + raise ValueError(f'Unknown optimizer: {config["optimizer"]}') + + self.optimizer = optimizer + + # Logging + self.logger = logger + if logger is None: + self.logger = Logger(config) + + # Learning rate scheduler + epochs = max(1, config['total_timesteps'] // config['batch_size']) + self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs) + self.total_epochs = epochs + + # Automatic mixed precision + precision = config['precision'] + self.amp_context = contextlib.nullcontext() + if config.get('amp', True) and config['device'] == 'cuda': + self.amp_context = torch.amp.autocast(device_type='cuda', dtype=getattr(torch, precision)) + if precision not in ('float32', 'bfloat16'): + raise pufferlib.APIUsageError(f'Invalid precision: {precision}: use float32 or bfloat16') + + # Initializations + self.config = config + self.vecenv = vecenv + self.epoch = 0 + self.global_step = 0 + self.last_log_step = 0 + self.last_log_time = time.time() + self.start_time = time.time() + self.utilization = Utilization() + self.profile = Profile() + self.stats = defaultdict(list) + self.last_stats = defaultdict(list) + self.losses = {} + self.verbose = verbose + + # Dashboard + self.model_size = sum(p.numel() for p in policy.parameters() if p.requires_grad) + self.print_dashboard(clear=True) + + @property + def uptime(self): + return time.time() - self.start_time + + @property + def sps(self): + if self.global_step == self.last_log_step: + return 0 + + return (self.global_step - self.last_log_step) / (time.time() - self.last_log_time) + + def evaluate(self): + profile = self.profile + epoch = self.epoch + profile('eval', epoch) + profile('eval_misc', epoch, nest=True) + + config = self.config + device = config['device'] + + if config['use_rnn']: + for k in self.lstm_h: + self.lstm_h[k] = torch.zeros(self.lstm_h[k].shape, device=device) + self.lstm_c[k] = torch.zeros(self.lstm_c[k].shape, device=device) + + self.full_rows = 0 + while self.full_rows < self.segments: + profile('env', epoch) + o, r, d, t, info, env_id, mask = self.vecenv.recv() + + profile('eval_misc', epoch) + env_id = slice(env_id[0], env_id[-1] + 1) + + done_mask = d + t # TODO: Handle truncations separately + self.global_step += int(mask.sum()) + + profile('eval_copy', epoch) + o = torch.as_tensor(o) + o_device = o.to(device)#, non_blocking=True) + r = torch.as_tensor(r).to(device)#, non_blocking=True) + d = torch.as_tensor(d).to(device)#, non_blocking=True) + + profile('eval_forward', epoch) + with torch.no_grad(), self.amp_context: + state = dict( + reward=r, + done=d, + env_id=env_id, + mask=mask, + ) + + if config['use_rnn']: + state['lstm_h'] = self.lstm_h[env_id.start] + state['lstm_c'] = self.lstm_c[env_id.start] + + logits, value = self.policy.forward_eval(o_device, state) + action, logprob, _ = pufferlib.pytorch.sample_logits(logits) + r = torch.clamp(r, -1, 1) + + profile('eval_copy', epoch) + with torch.no_grad(): + if config['use_rnn']: + self.lstm_h[env_id.start] = state['lstm_h'] + self.lstm_c[env_id.start] = state['lstm_c'] + + # Fast path for fully vectorized envs + l = self.ep_lengths[env_id.start].item() + batch_rows = slice(self.ep_indices[env_id.start].item(), 1+self.ep_indices[env_id.stop - 1].item()) + + if config['cpu_offload']: + self.observations[batch_rows, l] = o + else: + self.observations[batch_rows, l] = o_device + + self.actions[batch_rows, l] = action + self.logprobs[batch_rows, l] = logprob + self.rewards[batch_rows, l] = r + self.terminals[batch_rows, l] = d.float() + self.values[batch_rows, l] = value.flatten() + + # Note: We are not yet handling masks in this version + self.ep_lengths[env_id] += 1 + if l+1 >= config['bptt_horizon']: + num_full = env_id.stop - env_id.start + self.ep_indices[env_id] = self.free_idx + torch.arange(num_full, device=config['device']).int() + self.ep_lengths[env_id] = 0 + self.free_idx += num_full + self.full_rows += num_full + + action = action.cpu().numpy() + if isinstance(logits, torch.distributions.Normal): + action = np.clip(action, self.vecenv.action_space.low, self.vecenv.action_space.high) + + profile('eval_misc', epoch) + for i in info: + for k, v in pufferlib.unroll_nested_dict(i): + if isinstance(v, np.ndarray): + v = v.tolist() + elif isinstance(v, (list, tuple)): + self.stats[k].extend(v) + else: + self.stats[k].append(v) + + profile('env', epoch) + self.vecenv.send(action) + + profile('eval_misc', epoch) + self.free_idx = self.total_agents + self.ep_indices = torch.arange(self.total_agents, device=device, dtype=torch.int32) + self.ep_lengths.zero_() + profile.end() + return self.stats + + @record + def train(self): + profile = self.profile + epoch = self.epoch + profile('train', epoch) + losses = defaultdict(float) + config = self.config + device = config['device'] + + b0 = config['prio_beta0'] + a = config['prio_alpha'] + clip_coef = config['clip_coef'] + vf_clip = config['vf_clip_coef'] + anneal_beta = b0 + (1 - b0)*a*self.epoch/self.total_epochs + self.ratio[:] = 1 + + num_minibatches = config['num_minibatches'] + for mb in range(num_minibatches): + profile('train_misc', epoch, nest=True) + self.amp_context.__enter__() + + shape = self.values.shape + advantages = torch.zeros(shape, device=device) + advantages = compute_puff_advantage(self.values, self.rewards, + self.terminals, self.ratio, advantages, config['gamma'], + config['gae_lambda'], config['vtrace_rho_clip'], config['vtrace_c_clip']) + + profile('train_copy', epoch) + adv = advantages.abs().sum(axis=1) + prio_weights = torch.nan_to_num(adv**a, 0, 0, 0) + prio_probs = (prio_weights + 1e-6)/(prio_weights.sum() + 1e-6) + idx = torch.multinomial(prio_probs, + self.minibatch_segments, replacement=True) + mb_prio = (self.segments*prio_probs[idx, None])**-anneal_beta + mb_obs = self.observations[idx] + mb_actions = self.actions[idx] + mb_logprobs = self.logprobs[idx] + mb_rewards = self.rewards[idx] + mb_terminals = self.terminals[idx] + mb_truncations = self.truncations[idx] + mb_ratio = self.ratio[idx] + mb_values = self.values[idx] + mb_returns = advantages[idx] + mb_values + mb_advantages = advantages[idx] + + profile('train_forward', epoch) + if not config['use_rnn']: + mb_obs = mb_obs.reshape(-1, *self.vecenv.single_observation_space.shape) + + state = dict( + action=mb_actions, + lstm_h=None, + lstm_c=None, + ) + + logits, newvalue = self.policy(mb_obs, state) + actions, newlogprob, entropy = pufferlib.pytorch.sample_logits(logits, action=mb_actions) + + profile('train_misc', epoch) + newlogprob = newlogprob.reshape(mb_logprobs.shape) + logratio = newlogprob - mb_logprobs + ratio = logratio.exp() + self.ratio[idx] = ratio.detach() + + with torch.no_grad(): + old_approx_kl = (-logratio).mean() + approx_kl = ((ratio - 1) - logratio).mean() + clipfrac = ((ratio - 1.0).abs() > config['clip_coef']).float().mean() + + adv = advantages[idx] + adv = compute_puff_advantage(mb_values, mb_rewards, mb_terminals, + ratio, adv, config['gamma'], config['gae_lambda'], + config['vtrace_rho_clip'], config['vtrace_c_clip']) + adv = mb_advantages + adv = mb_prio * (adv - adv.mean()) / (adv.std() + 1e-8) + + # Losses + pg_loss1 = -adv * ratio + pg_loss2 = -adv * torch.clamp(ratio, 1 - clip_coef, 1 + clip_coef) + pg_loss = torch.max(pg_loss1, pg_loss2).mean() + + newvalue = newvalue.view(mb_returns.shape) + v_clipped = mb_values + torch.clamp(newvalue - mb_values, -vf_clip, vf_clip) + v_loss_unclipped = (newvalue - mb_returns) ** 2 + v_loss_clipped = (v_clipped - mb_returns) ** 2 + v_loss = 0.5*torch.max(v_loss_unclipped, v_loss_clipped).mean() + + entropy_loss = entropy.mean() + + loss = pg_loss + config['vf_coef']*v_loss - config['ent_coef']*entropy_loss + self.amp_context.__enter__() # TODO: AMP needs some debugging + + # This breaks vloss clipping? + self.values[idx] = newvalue.detach().float() + + # Logging + profile('train_misc', epoch) + losses['policy_loss'] += pg_loss.item() / num_minibatches + losses['value_loss'] += v_loss.item() / num_minibatches + losses['entropy'] += entropy_loss.item() / num_minibatches + losses['old_approx_kl'] += old_approx_kl.item() / num_minibatches + losses['approx_kl'] += approx_kl.item() / num_minibatches + losses['clipfrac'] += clipfrac.item() / num_minibatches + losses['importance'] += ratio.mean().item() / num_minibatches + + # Learn on accumulated minibatches + profile('learn', epoch) + loss.backward() + if (mb + 1) % self.accumulate_minibatches == 0: + torch.nn.utils.clip_grad_norm_(self.policy.parameters(), config['max_grad_norm']) + self.optimizer.step() + self.optimizer.zero_grad() + + # Reprioritize experience + profile('train_misc', epoch) + if config['anneal_lr']: + self.scheduler.step() + + y_pred = self.values.flatten() + y_true = advantages.flatten() + self.values.flatten() + var_y = y_true.var() + explained_var = torch.nan if var_y == 0 else (1 - (y_true - y_pred).var() / var_y).item() + losses['explained_variance'] = explained_var + + profile.end() + logs = None + self.epoch += 1 + done_training = self.global_step >= config['total_timesteps'] + if done_training or self.global_step == 0 or time.time() > self.last_log_time + 0.25: + logs = self.mean_and_log() + self.losses = losses + self.print_dashboard() + self.stats = defaultdict(list) + self.last_log_time = time.time() + self.last_log_step = self.global_step + profile.clear() + + if self.epoch % config['checkpoint_interval'] == 0 or done_training: + self.save_checkpoint() + self.msg = f'Checkpoint saved at update {self.epoch}' + + return logs + + def mean_and_log(self): + config = self.config + for k in list(self.stats.keys()): + v = self.stats[k] + try: + v = np.mean(v) + except: + del self.stats[k] + + self.stats[k] = v + + device = config['device'] + agent_steps = int(dist_sum(self.global_step, device)) + logs = { + 'SPS': dist_sum(self.sps, device), + 'agent_steps': agent_steps, + 'uptime': time.time() - self.start_time, + 'epoch': int(dist_sum(self.epoch, device)), + 'learning_rate': self.optimizer.param_groups[0]["lr"], + **{f'environment/{k}': v for k, v in self.stats.items()}, + **{f'losses/{k}': v for k, v in self.losses.items()}, + **{f'performance/{k}': v['elapsed'] for k, v in self.profile}, + #**{f'environment/{k}': dist_mean(v, device) for k, v in self.stats.items()}, + #**{f'losses/{k}': dist_mean(v, device) for k, v in self.losses.items()}, + #**{f'performance/{k}': dist_sum(v['elapsed'], device) for k, v in self.profile}, + } + + if torch.distributed.is_initialized(): + if torch.distributed.get_rank() != 0: + self.logger.log(logs, agent_steps) + return logs + else: + return None + + self.logger.log(logs, agent_steps) + return logs + + def close(self): + self.vecenv.close() + self.utilization.stop() + model_path = self.save_checkpoint() + run_id = self.logger.run_id + path = os.path.join(self.config['data_dir'], + self.config["env"], f'{run_id}.pt') + shutil.copy(model_path, path) + return path + + def save_checkpoint(self): + if torch.distributed.is_initialized(): + if torch.distributed.get_rank() != 0: + return + + run_id = self.logger.run_id + path = os.path.join(self.config['data_dir'], + self.config["env"], run_id) + if not os.path.exists(path): + os.makedirs(path) + + model_name = f'model_{self.config["env"]}_{self.epoch:06d}.pt' + model_path = os.path.join(path, model_name) + if os.path.exists(model_path): + return model_path + + torch.save(self.uncompiled_policy.state_dict(), model_path) + + state = { + 'optimizer_state_dict': self.optimizer.state_dict(), + 'global_step': self.global_step, + 'agent_step': self.global_step, + 'update': self.epoch, + 'model_name': model_name, + 'run_id': run_id, + } + state_path = os.path.join(path, 'trainer_state.pt') + torch.save(state, state_path + '.tmp') + os.rename(state_path + '.tmp', state_path) + return model_path + + def print_dashboard(self, clear=False, idx=[0], + c1='[cyan]', c2='[white]', b1='[bright_cyan]', b2='[bright_white]'): + if not self.verbose: + return + + config = self.config + sps = dist_sum(self.sps, config['device']) + agent_steps = dist_sum(self.global_step, config['device']) + if torch.distributed.is_initialized(): + if torch.distributed.get_rank() != 0: + return + + profile = self.profile + console = Console() + dashboard = Table(box=rich.box.ROUNDED, expand=True, + show_header=False, border_style='bright_cyan') + table = Table(box=None, expand=True, show_header=False) + dashboard.add_row(table) + + table.add_column(justify="left", width=30) + table.add_column(justify="center", width=12) + table.add_column(justify="center", width=12) + table.add_column(justify="center", width=13) + table.add_column(justify="right", width=13) + + table.add_row( + f'{b1}PufferLib {b2}3.0 {idx[0]*" "}:blowfish:', + f'{c1}CPU: {b2}{np.mean(self.utilization.cpu_util):.1f}{c2}%', + f'{c1}GPU: {b2}{np.mean(self.utilization.gpu_util):.1f}{c2}%', + f'{c1}DRAM: {b2}{np.mean(self.utilization.cpu_mem):.1f}{c2}%', + f'{c1}VRAM: {b2}{np.mean(self.utilization.gpu_mem):.1f}{c2}%', + ) + idx[0] = (idx[0] - 1) % 10 + + s = Table(box=None, expand=True) + remaining = 'A hair past a freckle' + if sps != 0: + remaining = duration((config['total_timesteps'] - agent_steps)/sps, b2, c2) + + s.add_column(f"{c1}Summary", justify='left', vertical='top', width=10) + s.add_column(f"{c1}Value", justify='right', vertical='top', width=14) + s.add_row(f'{c2}Env', f'{b2}{config["env"]}') + s.add_row(f'{c2}Params', abbreviate(self.model_size, b2, c2)) + s.add_row(f'{c2}Steps', abbreviate(agent_steps, b2, c2)) + s.add_row(f'{c2}SPS', abbreviate(sps, b2, c2)) + s.add_row(f'{c2}Epoch', f'{b2}{self.epoch}') + s.add_row(f'{c2}Uptime', duration(self.uptime, b2, c2)) + s.add_row(f'{c2}Remaining', remaining) + + delta = profile.eval['buffer'] + profile.train['buffer'] + p = Table(box=None, expand=True, show_header=False) + p.add_column(f"{c1}Performance", justify="left", width=10) + p.add_column(f"{c1}Time", justify="right", width=8) + p.add_column(f"{c1}%", justify="right", width=4) + p.add_row(*fmt_perf('Evaluate', b1, delta, profile.eval, b2, c2)) + p.add_row(*fmt_perf(' Forward', c2, delta, profile.eval_forward, b2, c2)) + p.add_row(*fmt_perf(' Env', c2, delta, profile.env, b2, c2)) + p.add_row(*fmt_perf(' Copy', c2, delta, profile.eval_copy, b2, c2)) + p.add_row(*fmt_perf(' Misc', c2, delta, profile.eval_misc, b2, c2)) + p.add_row(*fmt_perf('Train', b1, delta, profile.train, b2, c2)) + p.add_row(*fmt_perf(' Forward', c2, delta, profile.train_forward, b2, c2)) + p.add_row(*fmt_perf(' Learn', c2, delta, profile.learn, b2, c2)) + p.add_row(*fmt_perf(' Copy', c2, delta, profile.train_copy, b2, c2)) + p.add_row(*fmt_perf(' Misc', c2, delta, profile.train_misc, b2, c2)) + + l = Table(box=None, expand=True, ) + l.add_column(f'{c1}Losses', justify="left", width=16) + l.add_column(f'{c1}Value', justify="right", width=8) + for metric, value in self.losses.items(): + l.add_row(f'{c2}{metric}', f'{b2}{value:.3f}') + + monitor = Table(box=None, expand=True, pad_edge=False) + monitor.add_row(s, p, l) + dashboard.add_row(monitor) + + table = Table(box=None, expand=True, pad_edge=False) + dashboard.add_row(table) + left = Table(box=None, expand=True) + right = Table(box=None, expand=True) + table.add_row(left, right) + left.add_column(f"{c1}User Stats", justify="left", width=20) + left.add_column(f"{c1}Value", justify="right", width=10) + right.add_column(f"{c1}User Stats", justify="left", width=20) + right.add_column(f"{c1}Value", justify="right", width=10) + i = 0 + + if self.stats: + self.last_stats = self.stats + + for metric, value in (self.stats or self.last_stats).items(): + try: # Discard non-numeric values + int(value) + except: + continue + + u = left if i % 2 == 0 else right + u.add_row(f'{c2}{metric}', f'{b2}{value:.3f}') + i += 1 + if i == 30: + break + + if clear: + console.clear() + + with console.capture() as capture: + console.print(dashboard) + + print('\033[0;0H' + capture.get()) + +def compute_puff_advantage(values, rewards, terminals, + ratio, advantages, gamma, gae_lambda, vtrace_rho_clip, vtrace_c_clip): + '''CUDA kernel for puffer advantage with automatic CPU fallback. You need + nvcc (in cuda-dev-tools or in a cuda-dev docker base) for PufferLib to + compile the fast version.''' + + device = values.device + if not ADVANTAGE_CUDA: + values = values.cpu() + rewards = rewards.cpu() + terminals = terminals.cpu() + ratio = ratio.cpu() + advantages = advantages.cpu() + + torch.ops.pufferlib.compute_puff_advantage(values, rewards, terminals, + ratio, advantages, gamma, gae_lambda, vtrace_rho_clip, vtrace_c_clip) + + if not ADVANTAGE_CUDA: + return advantages.to(device) + + return advantages + + +def abbreviate(num, b2, c2): + if num < 1e3: + return str(num) + elif num < 1e6: + return f'{num/1e3:.1f}K' + elif num < 1e9: + return f'{num/1e6:.1f}M' + elif num < 1e12: + return f'{num/1e9:.1f}B' + else: + return f'{num/1e12:.2f}T' + +def duration(seconds, b2, c2): + if seconds < 0: + return f"{b2}0{c2}s" + seconds = int(seconds) + h = seconds // 3600 + m = (seconds % 3600) // 60 + s = seconds % 60 + return f"{b2}{h}{c2}h {b2}{m}{c2}m {b2}{s}{c2}s" if h else f"{b2}{m}{c2}m {b2}{s}{c2}s" if m else f"{b2}{s}{c2}s" + +def fmt_perf(name, color, delta_ref, prof, b2, c2): + percent = 0 if delta_ref == 0 else int(100*prof['buffer']/delta_ref - 1e-5) + return f'{color}{name}', duration(prof['elapsed'], b2, c2), f'{b2}{percent:2d}{c2}%' + +def dist_sum(value, device): + if not torch.distributed.is_initialized(): + return value + + tensor = torch.tensor(value, device=device) + torch.distributed.all_reduce(tensor, op=torch.distributed.ReduceOp.SUM) + return tensor.item() + +def dist_mean(value, device): + if not torch.distributed.is_initialized(): + return value + + return dist_sum(value, device) / torch.distributed.get_world_size() + +class Profile: + def __init__(self, frequency=5): + self.profiles = defaultdict(lambda: defaultdict(float)) + self.frequency = frequency + self.stack = [] + + def __iter__(self): + return iter(self.profiles.items()) + + def __getattr__(self, name): + return self.profiles[name] + + def __call__(self, name, epoch, nest=False): + if epoch % self.frequency != 0: + return + + #if torch.cuda.is_available(): + # torch.cuda.synchronize() + + tick = time.time() + if len(self.stack) != 0 and not nest: + self.pop(tick) + + self.stack.append(name) + self.profiles[name]['start'] = tick + + def pop(self, end): + profile = self.profiles[self.stack.pop()] + delta = end - profile['start'] + profile['elapsed'] += delta + profile['delta'] += delta + + def end(self): + #if torch.cuda.is_available(): + # torch.cuda.synchronize() + + end = time.time() + for i in range(len(self.stack)): + self.pop(end) + + def clear(self): + for prof in self.profiles.values(): + if prof['delta'] > 0: + prof['buffer'] = prof['delta'] + prof['delta'] = 0 + +class Utilization(Thread): + def __init__(self, delay=1, maxlen=20): + super().__init__() + self.cpu_mem = deque([0], maxlen=maxlen) + self.cpu_util = deque([0], maxlen=maxlen) + self.gpu_util = deque([0], maxlen=maxlen) + self.gpu_mem = deque([0], maxlen=maxlen) + self.stopped = False + self.delay = delay + self.start() + + def run(self): + while not self.stopped: + self.cpu_util.append(100*psutil.cpu_percent()/psutil.cpu_count()) + mem = psutil.virtual_memory() + self.cpu_mem.append(100*mem.active/mem.total) + if torch.cuda.is_available(): + # Monitoring in distributed crashes nvml + if torch.distributed.is_initialized(): + time.sleep(self.delay) + continue + + #self.gpu_util.append(torch.cuda.utilization()) + #free, total = torch.cuda.mem_get_info() + #self.gpu_mem.append(100*(total-free)/total) + else: + self.gpu_util.append(0) + self.gpu_mem.append(0) + + time.sleep(self.delay) + + def stop(self): + self.stopped = True + +def downsample(arr, m): + if len(arr) <= m: + return arr + + if m == 0: + return [arr[-1]] + + orig_arr = arr + last = arr[-1] + arr = arr[:-1] + arr = np.array(arr) + n = len(arr) + n = (n//m)*m + arr = arr[-n:] + downsampled = arr.reshape(m, -1).mean(axis=1) + return np.concatenate([downsampled, [last]]) + +class Logger: + def __init__(self, args): + self.run_id = str(int(1000*time.time())) + root = os.path.join(args['data_dir'], 'logs', args['env']) + if not os.path.exists(root): + os.makedirs(root) + + self.path = os.path.join(root, self.run_id + '.json') + self.logs = {'data': []} + for k, v in pufferlib.unroll_nested_dict(args): + self.logs[k] = v + + # Temp hack to log full config + def init(self, args): + for k, v in pufferlib.unroll_nested_dict(args): + self.logs[k] = v + + def log(self, logs, step): + self.logs['data'].append(logs) + + def log_cost(self, cost): + self.logs['cost'] = cost + + def close(self, model_path): + import json + with open(self.path, 'w') as f: + json.dump(self.logs, f) + +class NeptuneLogger: + def __init__(self, args, load_id=None, mode='async'): + import neptune as nept + neptune_name = args['neptune_name'] + neptune_project = args['neptune_project'] + neptune = nept.init_run( + project=f"{neptune_name}/{neptune_project}", + capture_hardware_metrics=False, + capture_stdout=False, + capture_stderr=False, + capture_traceback=False, + with_id=load_id, + mode=mode, + tags = [args['tag']] if args['tag'] is not None else [], + ) + self.run_id = neptune._sys_id + self.neptune = neptune + for k, v in pufferlib.unroll_nested_dict(args): + neptune[k].append(v) + + def init(self, args): + pass + + def log_cost(self, cost): + pass + + def log(self, logs, step): + for k, v in logs.items(): + self.neptune[k].append(v, step=step) + + def close(self, model_path): + self.neptune['model'].track_files(model_path) + self.neptune.stop() + + def download(self): + self.neptune["model"].download(destination='artifacts') + return f'artifacts/{self.run_id}.pt' + +class WandbLogger: + def __init__(self, args, load_id=None, resume='allow'): + import wandb + wandb.init( + id=load_id or wandb.util.generate_id(), + project=args['wandb_project'], + group=args['wandb_group'], + allow_val_change=True, + save_code=False, + resume=resume, + config=args, + tags = [args['tag']] if args['tag'] is not None else [], + ) + self.wandb = wandb + self.run_id = wandb.run.id + + def log(self, logs, step): + self.wandb.log(logs, step=step) + + def close(self, model_path): + artifact = self.wandb.Artifact(self.run_id, type='model') + artifact.add_file(model_path) + self.wandb.run.log_artifact(artifact) + self.wandb.finish() + + def download(self): + artifact = self.wandb.use_artifact(f'{self.run_id}:latest') + data_dir = artifact.download() + model_file = max(os.listdir(data_dir)) + return f'{data_dir}/{model_file}' + +def train(env_name, args=None, vecenv=None, policy=None, logger=None, verbose=True): + args = args or load_config(env_name) + + # Assume TorchRun DDP is used if LOCAL_RANK is set + if 'LOCAL_RANK' in os.environ: + world_size = int(os.environ.get('WORLD_SIZE', 1)) + print("World size", world_size) + master_addr = os.environ.get('MASTER_ADDR', 'localhost') + master_port = os.environ.get('MASTER_PORT', '29500') + local_rank = int(os.environ["LOCAL_RANK"]) + print(f"rank: {local_rank}, MASTER_ADDR={master_addr}, MASTER_PORT={master_port}") + torch.cuda.set_device(local_rank) + os.environ["CUDA_VISIBLE_DEVICES"] = str(local_rank) + + vecenv = vecenv or load_env(env_name, args) + policy = policy or load_policy(args, vecenv, env_name) + + if 'LOCAL_RANK' in os.environ: + args['train']['device'] = torch.cuda.current_device() + torch.distributed.init_process_group(backend='nccl', world_size=world_size) + policy = policy.to(local_rank) + model = torch.nn.parallel.DistributedDataParallel( + policy, device_ids=[local_rank], output_device=local_rank + ) + if hasattr(policy, 'lstm'): + #model.lstm = policy.lstm + model.hidden_size = policy.hidden_size + + model.forward_eval = policy.forward_eval + policy = model.to(local_rank) + + if args['neptune']: + logger = NeptuneLogger(args) + elif args['wandb']: + logger = WandbLogger(args) + + train_config = dict(**args['train'], env=env_name) + pufferl = PuffeRL(train_config, vecenv, policy, logger, verbose) + pufferl.logger.init(args) + + all_logs = [] + while pufferl.global_step < train_config['total_timesteps']: + if pufferl.uptime > args['sweep']['max_cost']: + break + + if train_config['device'] == 'cuda': + torch.compiler.cudagraph_mark_step_begin() + pufferl.evaluate() + if train_config['device'] == 'cuda': + torch.compiler.cudagraph_mark_step_begin() + logs = pufferl.train() + + if logs is not None: + if pufferl.global_step > 0.20*train_config['total_timesteps']: + all_logs.append(logs) + + # Final eval. You can reset the env here, but depending on + # your env, this can skew data (i.e. you only collect the shortest + # rollouts within a fixed number of epochs) + i = 0 + stats = {} + uptime = pufferl.uptime + agent_steps = pufferl.global_step + while i < 128 or not stats: + stats = pufferl.evaluate() + i += 1 + + logs = pufferl.mean_and_log() + logs['uptime'] = uptime + logs['agent_steps'] = agent_steps + if logs is not None: + all_logs.append(logs) + + pufferl.print_dashboard() + model_path = pufferl.close() + pufferl.logger.log_cost(uptime) + pufferl.logger.close(model_path) + return all_logs + +def eval(env_name, args=None, vecenv=None, policy=None): + args = args or load_config(env_name) + backend = args['vec']['backend'] + if backend != 'PufferEnv': + backend = 'Serial' + + args['vec'] = dict(backend=backend, num_envs=1) + vecenv = vecenv or load_env(env_name, args) + + policy = policy or load_policy(args, vecenv, env_name) + ob, info = vecenv.reset() + driver = vecenv.driver_env + num_agents = vecenv.observation_space.shape[0] + device = args['train']['device'] + + state = {} + if args['train']['use_rnn']: + state = dict( + lstm_h=torch.zeros(num_agents, policy.hidden_size, device=device), + lstm_c=torch.zeros(num_agents, policy.hidden_size, device=device), + ) + + frames = [] + while True: + render = driver.render() + if len(frames) < args['save_frames']: + frames.append(render) + + # Screenshot Ocean envs with F12, gifs with control + F12 + if driver.render_mode == 'ansi': + print('\033[0;0H' + render + '\n') + time.sleep(1/args['fps']) + elif driver.render_mode == 'rgb_array': + pass + #import cv2 + #render = cv2.cvtColor(render, cv2.COLOR_RGB2BGR) + #cv2.imshow('frame', render) + #cv2.waitKey(1) + #time.sleep(1/args['fps']) + + with torch.no_grad(): + ob = torch.as_tensor(ob).to(device) + logits, value = policy.forward_eval(ob, state) + action, logprob, _ = pufferlib.pytorch.sample_logits(logits) + action = action.cpu().numpy().reshape(vecenv.action_space.shape) + + if isinstance(logits, torch.distributions.Normal): + action = np.clip(action, vecenv.action_space.low, vecenv.action_space.high) + + ob = vecenv.step(action)[0] + + if len(frames) > 0 and len(frames) == args['save_frames']: + import imageio + imageio.mimsave(args['gif_path'], frames, fps=args['fps'], loop=0) + frames.append('Done') + +def _sweep_worker(env_name, q_host, q_worker, device): + while True: + #print("Worker waiting") + args = q_worker.get() + #print("Worker got data") + args['train']['device'] = device + seed = time.time_ns() & 0xFFFFFFFF + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + try: + all_logs = train(env_name, args=args, verbose=False) + except Exception: + import traceback + traceback.print_exc() + + #all_logs = [{'foo': 0}] + #print("Worker ran experiment") + q_host.put(all_logs) + #print("Worker submitted result") + +def multisweep(args=None, env_name=None): + args = args or load_config(env_name) + sweep_gpus = args['sweep_gpus'] + if sweep_gpus == -1: + sweep_gpus = torch.cuda.device_count() + + method = args['sweep'].pop('method') + try: + sweep_cls = getattr(pufferlib.sweep, method) + except: + raise pufferlib.APIUsageError(f'Invalid sweep method {method}. See pufferlib.sweep') + + sweep = sweep_cls(args['sweep']) + points_per_run = args['sweep']['downsample'] + target_key = f'environment/{args["sweep"]["metric"]}' + + from multiprocessing import Process, Queue, set_start_method + from copy import deepcopy + + host_queues = [] + worker_queues = [] + workers = [] + worker_args = [] + set_start_method('spawn') + for i in range(sweep_gpus): + q_host = Queue() + q_worker = Queue() + w = Process( + target=_sweep_worker, + args=(env_name, q_host, q_worker, f'cuda:{i}') + ) + w.start() + host_queues.append(q_host) + worker_queues.append(q_worker) + args = deepcopy(args) + worker_args.append(args) + + for w in range(sweep_gpus): + args = worker_args[w] + sweep.suggest(args) + total_timesteps = args['train']['total_timesteps'] + worker_queues[w].put(args) + + runs = 0 + + suggestion = deepcopy(args) + while runs < args['max_runs']: + for w in range(sweep_gpus): + args = worker_args[w] + if host_queues[w].empty(): + continue + + all_logs = host_queues[w].get(timeout=0) + if not all_logs: + continue + + all_logs = [e for e in all_logs if target_key in e] + scores = downsample([log[target_key] for log in all_logs], points_per_run) + times = downsample([log['uptime'] for log in all_logs], points_per_run) + steps = downsample([log['agent_steps'] for log in all_logs], points_per_run) + #costs = np.stack([times, steps], axis=1) + costs = times + timesteps = [log['agent_steps'] for log in all_logs] + timesteps = downsample(timesteps, points_per_run) + for score, cost, timestep in zip(scores, costs, timesteps): + args['train']['total_timesteps'] = timestep + sweep.observe(args, score, cost) + + runs += 1 + + sweep.suggest(args) + worker_queues[w].put(args) + +def paretosweep(args=None, env_name=None): + args = args or load_config(env_name) + sweep_gpus = args['sweep_gpus'] + if sweep_gpus == -1: + sweep_gpus = torch.cuda.device_count() + + method = args['sweep'].pop('method') + try: + sweep_cls = getattr(pufferlib.sweep, method) + except: + raise pufferlib.APIUsageError(f'Invalid sweep method {method}. See pufferlib.sweep') + + total_timesteps = args['sweep']['train'].pop('total_timesteps') + mmin = total_timesteps['min'] + mmax = total_timesteps['max'] + all_timesteps = np.geomspace(mmin, mmax, sweep_gpus) + # You hardcoded buffer size to 5 instead of 10 for this + sweeps = [sweep_cls(args['sweep']) for _ in range(sweep_gpus)] + points_per_run = args['sweep']['downsample'] + target_key = f'environment/{args["sweep"]["metric"]}' + + from multiprocessing import Process, Queue, set_start_method + from copy import deepcopy + + host_queues = [] + worker_queues = [] + workers = [] + worker_args = [] + set_start_method('spawn') + for i in range(sweep_gpus): + q_host = Queue() + q_worker = Queue() + w = Process( + target=_sweep_worker, + args=(env_name, q_host, q_worker, f'cuda:{i}') + ) + w.start() + host_queues.append(q_host) + worker_queues.append(q_worker) + args = deepcopy(args) + worker_args.append(args) + + for w in range(sweep_gpus): + args = worker_args[w] + sweeps[w].suggest(args) + args['train']['total_timesteps'] = all_timesteps[w] + worker_queues[w].put(args) + + runs = 0 + + suggestion = deepcopy(args) + while runs < args['max_runs']: + for w in range(sweep_gpus): + args = worker_args[w] + if host_queues[w].empty(): + continue + + all_logs = host_queues[w].get(timeout=0) + if not all_logs: + continue + + all_logs = [e for e in all_logs if target_key in e] + scores = downsample([log[target_key] for log in all_logs], points_per_run) + times = downsample([log['uptime'] for log in all_logs], points_per_run) + steps = downsample([log['agent_steps'] for log in all_logs], points_per_run) + #costs = np.stack([times, steps], axis=1) + costs = times + timesteps = [log['agent_steps'] for log in all_logs] + timesteps = downsample(timesteps, points_per_run) + for score, cost, timestep in zip(scores, costs, timesteps): + args['train']['total_timesteps'] = timestep + sweeps[w].observe(args, score, cost) + + runs += 1 + + sweeps[w].suggest(args) + args['train']['total_timesteps'] = all_timesteps[w] + worker_queues[w].put(args) + + print('Done') + +def sweep(args=None, env_name=None): + args = args or load_config(env_name) + if not args['wandb'] and not args['neptune']: + raise pufferlib.APIUsageError('Sweeps require either wandb or neptune') + + method = args['sweep'].pop('method') + try: + sweep_cls = getattr(pufferlib.sweep, method) + except: + raise pufferlib.APIUsageError(f'Invalid sweep method {method}. See pufferlib.sweep') + + sweep = sweep_cls(args['sweep']) + points_per_run = args['sweep']['downsample'] + target_key = f'environment/{args["sweep"]["metric"]}' + for i in range(args['max_runs']): + seed = time.time_ns() & 0xFFFFFFFF + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + sweep.suggest(args) + total_timesteps = args['train']['total_timesteps'] + all_logs = train(env_name, args=args) + all_logs = [e for e in all_logs if target_key in e] + scores = downsample([log[target_key] for log in all_logs], points_per_run) + costs = downsample([log['agent_steps'] for log in all_logs], points_per_run) + timesteps = downsample([log['agent_steps'] for log in all_logs], points_per_run) + for score, cost, timestep in zip(scores, costs, timesteps): + args['train']['total_timesteps'] = timestep + sweep.observe(args, score, cost) + + # Prevent logging final eval steps as training steps + args['train']['total_timesteps'] = total_timesteps + +def profile(args=None, env_name=None, vecenv=None, policy=None): + args = load_config() + vecenv = vecenv or load_env(env_name, args) + policy = policy or load_policy(args, vecenv) + + train_config = dict(**args['train'], env=args['env_name'], tag=args['tag']) + pufferl = PuffeRL(train_config, vecenv, policy, neptune=args['neptune'], wandb=args['wandb']) + + import torchvision.models as models + from torch.profiler import profile, record_function, ProfilerActivity + with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof: + with record_function("model_inference"): + for _ in range(10): + stats = pufferl.evaluate() + pufferl.train() + + print(prof.key_averages().table(sort_by='cuda_time_total', row_limit=10)) + prof.export_chrome_trace("trace.json") + +def export(args=None, env_name=None, vecenv=None, policy=None): + args = args or load_config(env_name) + vecenv = vecenv or load_env(env_name, args) + policy = policy or load_policy(args, vecenv) + + weights = [] + for name, param in policy.named_parameters(): + weights.append(param.data.cpu().numpy().flatten()) + print(name, param.shape, param.data.cpu().numpy().ravel()[0]) + + path = f'{args["env_name"]}_weights.bin' + weights = np.concatenate(weights) + weights.tofile(path) + print(f'Saved {len(weights)} weights to {path}') + +def autotune(args=None, env_name=None, vecenv=None, policy=None): + package = args['package'] + module_name = 'pufferlib.ocean' if package == 'ocean' else f'pufferlib.environments.{package}' + env_module = importlib.import_module(module_name) + env_name = args['env_name'] + make_env = env_module.env_creator(env_name) + pufferlib.vector.autotune(make_env, batch_size=args['train']['env_batch_size']) + +def load_env(env_name, args): + package = args['package'] + module_name = 'pufferlib.ocean' if package == 'ocean' else f'pufferlib.environments.{package}' + env_module = importlib.import_module(module_name) + make_env = env_module.env_creator(env_name) + return pufferlib.vector.make(make_env, env_kwargs=args['env'], **args['vec']) + +def load_policy(args, vecenv, env_name=''): + package = args['package'] + module_name = 'pufferlib.ocean' if package == 'ocean' else f'pufferlib.environments.{package}' + env_module = importlib.import_module(module_name) + + device = args['train']['device'] + policy_cls = getattr(env_module.torch, args['policy_name']) + policy = policy_cls(vecenv.driver_env, **args['policy']) + + rnn_name = args['rnn_name'] + if rnn_name is not None: + rnn_cls = getattr(env_module.torch, args['rnn_name']) + policy = rnn_cls(vecenv.driver_env, policy, **args['policy']) + + policy = policy.to(device) + + load_id = args['load_id'] + if load_id is not None: + if args['neptune']: + path = NeptuneLogger(args, load_id, mode='read-only').download() + elif args['wandb']: + path = WandbLogger(args, load_id).download() + else: + raise pufferlib.APIUsageError('No run id provided for eval') + + state_dict = torch.load(path, map_location=device) + state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()} + policy.load_state_dict(state_dict) + + load_path = args['load_model_path'] + if load_path == 'latest': + load_path = max(glob.glob(f"experiments/{env_name}*.pt"), key=os.path.getctime) + + if load_path is not None: + state_dict = torch.load(load_path, map_location=device) + state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()} + policy.load_state_dict(state_dict) + #state_path = os.path.join(*load_path.split('/')[:-1], 'state.pt') + #optim_state = torch.load(state_path)['optimizer_state_dict'] + #pufferl.optimizer.load_state_dict(optim_state) + + return policy + +def load_config(env_name): + parser = argparse.ArgumentParser( + description=f':blowfish: PufferLib [bright_cyan]{pufferlib.__version__}[/]' + ' demo options. Shows valid args for your env and policy', + formatter_class=RichHelpFormatter, add_help=False) + parser.add_argument('--load-model-path', type=str, default=None, + help='Path to a pretrained checkpoint') + parser.add_argument('--load-id', type=str, + default=None, help='Kickstart/eval from from a finished Wandb/Neptune run') + parser.add_argument('--render-mode', type=str, default='auto', + choices=['auto', 'human', 'ansi', 'rgb_array', 'raylib', 'None']) + parser.add_argument('--save-frames', type=int, default=0) + parser.add_argument('--gif-path', type=str, default='eval.gif') + parser.add_argument('--fps', type=float, default=15) + parser.add_argument('--max-runs', type=int, default=1200, help='Max number of sweep runs') + parser.add_argument('--wandb', action='store_true', help='Use wandb for logging') + parser.add_argument('--wandb-project', type=str, default='pufferlib') + parser.add_argument('--wandb-group', type=str, default='debug') + parser.add_argument('--neptune', action='store_true', help='Use neptune for logging') + parser.add_argument('--neptune-name', type=str, default='pufferai') + parser.add_argument('--neptune-project', type=str, default='ablations') + parser.add_argument('--local-rank', type=int, default=0, help='Used by torchrun for DDP') + parser.add_argument('--sweep-gpus', type=int, default=-1, help='multigpu sweeps') + parser.add_argument('--tag', type=str, default=None, help='Tag for experiment') + args = parser.parse_known_args()[0] + + # Load defaults and config + puffer_dir = os.path.dirname(os.path.realpath(__file__)) + puffer_config_dir = os.path.join(puffer_dir, 'config/**/*.ini') + puffer_default_config = os.path.join(puffer_dir, 'config/default.ini') + if env_name == 'default': + p = configparser.ConfigParser() + p.read(puffer_default_config) + else: + for path in glob.glob(puffer_config_dir, recursive=True): + p = configparser.ConfigParser() + p.read([puffer_default_config, path]) + if env_name in p['base']['env_name'].split(): break + else: + raise pufferlib.APIUsageError('No config for env_name {}'.format(env_name)) + + # Dynamic help menu from config + def puffer_type(value): + try: + return ast.literal_eval(value) + except: + return value + + for section in p.sections(): + for key in p[section]: + fmt = f'--{key}' if section == 'base' else f'--{section}.{key}' + parser.add_argument( + fmt.replace('_', '-'), + default=puffer_type(p[section][key]), + type=puffer_type + ) + + parser.add_argument('-h', '--help', default=argparse.SUPPRESS, + action='help', help='Show this help message and exit') + + # Unpack to nested dict + parsed = vars(parser.parse_args()) + args = defaultdict(dict) + for key, value in parsed.items(): + next = args + for subkey in key.split('.'): + prev = next + next = next.setdefault(subkey, {}) + + prev[subkey] = value + + args['train']['use_rnn'] = args['rnn_name'] is not None + return args + +def main(): + err = 'Usage: puffer [train, eval, sweep, autotune, profile, export] [env_name] [optional args]. --help for more info' + if len(sys.argv) < 3: + raise pufferlib.APIUsageError(err) + + mode = sys.argv.pop(1) + env_name = sys.argv.pop(1) + if mode == 'train': + train(env_name=env_name) + elif mode == 'eval': + eval(env_name=env_name) + elif mode == 'sweep': + sweep(env_name=env_name) + elif mode == 'multisweep': + multisweep(env_name=env_name) + elif mode == 'paretosweep': + paretosweep(env_name=env_name) + elif mode == 'autotune': + autotune(env_name=env_name) + elif mode == 'profile': + profile(env_name=env_name) + elif mode == 'export': + export(env_name=env_name) + else: + raise pufferlib.APIUsageError(err) + +if __name__ == '__main__': + main() diff --git a/test_squared_torch.py b/test_squared_torch.py index 7b50e4473..8a503f0b2 100644 --- a/test_squared_torch.py +++ b/test_squared_torch.py @@ -1,3 +1,4 @@ + import torch import torch.utils.cpp_extension try: @@ -5,30 +6,88 @@ except ImportError: raise ImportError('Failed to import C/CUDA advantage kernel. If you have non-default PyTorch, try installing with --no-build-isolation') +# Note: In Python/CUDA interop via PyTorch, we'll use integers directly +NOOP = 0 +DOWN = 1 +UP = 2 +LEFT = 3 +RIGHT = 4 if __name__ == '__main__': # THIS IS HARDCODED IN CUDA. DO NOT CHANGE num_envs = 2048 - steps = 10000 grid_size = 9 dummy = torch.zeros(5).cuda() indices = torch.arange(num_envs).cuda().int() - envs, obs, actions, rewards, terminals = torch.ops.squared.create_squared_environments(num_envs, grid_size, dummy) - atns = torch.randint(0, 5, (num_envs,)).cuda() - actions[:] = atns - - torch.ops.squared.reset_environments(envs, indices) + envs, obs, actions, rewards, terminals = _C.create_squared_environments(num_envs, grid_size, dummy) + _C.reset_environments(envs, indices) import time start = time.time() torch.cuda.synchronize() + for i in range(steps): - torch.ops.squared.step_environments(envs) + # Get agent and goal positions from obs + agent_pos = torch.nonzero(obs == 1, as_tuple=False) # [N, 3] -> (env_idx, y, x) + goal_pos = torch.nonzero(obs == 2, as_tuple=False) # [N, 3] -> (env_idx, y, x) + + # Extract environment indices and coordinates + agent_envs = agent_pos[:, 0] + agent_y = agent_pos[:, 1] + agent_x = agent_pos[:, 2] + + goal_envs = goal_pos[:, 0] + goal_y = goal_pos[:, 1] + goal_x = goal_pos[:, 2] + + # Since both are sorted by env index, we can assume alignment + # But we need to map both to the same batch dimension (num_envs) + # Create tensors to hold coords per env + device = obs.device + full_agent_y = torch.zeros(num_envs, dtype=torch.long, device=device) + full_agent_x = torch.zeros(num_envs, dtype=torch.long, device=device) + full_goal_y = torch.zeros(num_envs, dtype=torch.long, device=device) + full_goal_x = torch.zeros(num_envs, dtype=torch.long, device=device) + + # Scatter the detected positions into full arrays + full_agent_y[agent_envs] = agent_y + full_agent_x[agent_envs] = agent_x + full_goal_y[goal_envs] = goal_y + full_goal_x[goal_envs] = goal_x + + # Now compute desired actions + move_y = full_goal_y - full_agent_y + move_x = full_goal_x - full_agent_x + + # Default action is NOOP + atns = torch.full((num_envs,), NOOP, dtype=torch.long, device=device) + + up_mask = move_y < 0 + down_mask = move_y > 0 + atns[up_mask] = UP + atns[down_mask] = DOWN + + noop_mask = move_y == 0 + left_mask = noop_mask & (move_x < 0) + right_mask = noop_mask & (move_x > 0) + atns[left_mask] = LEFT + atns[right_mask] = RIGHT + + # Assign actions + actions[:] = atns + + # Step environment + _C.step_environments(envs, num_envs) torch.cuda.synchronize() end = time.time() - print('Steps/sec:', num_envs * steps / (end - start)) - + logs = _C.log_environments(envs, indices) + print('perf', logs.perf) + print('score', logs.score) + print('episode_return', logs.episode_return) + print('episode_length', logs.episode_length) + print('n', logs.n) + print('Steps/sec:', num_envs * steps / (end - start)) From d1395f133c818cf24e98e508615da7481cee4232 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 28 Oct 2025 17:38:38 +0000 Subject: [PATCH 082/188] Reproducible net init --- pufferlib/extensions/pufferlib.cpp | 43 ++++++++------- pufferlib/models.py | 87 ++++++++++++++++++++++++++++++ pufferlib/pufferl.py | 27 ++++++---- pufferlib/python_pufferl.py | 2 + 4 files changed, 129 insertions(+), 30 deletions(-) diff --git a/pufferlib/extensions/pufferlib.cpp b/pufferlib/extensions/pufferlib.cpp index 1150a6e76..282ef17f9 100644 --- a/pufferlib/extensions/pufferlib.cpp +++ b/pufferlib/extensions/pufferlib.cpp @@ -132,36 +132,39 @@ class PolicyLSTM : public torch::nn::Module { PolicyLSTM(int64_t input_size, int64_t num_atns, int64_t hidden_size = 128) : input_size_(input_size), hidden_size_(hidden_size), num_atns_(num_atns) { + torch::globalContext().setDeterministicCuDNN(true); + torch::globalContext().setBenchmarkCuDNN(false); + torch::manual_seed(42); + torch::cuda::manual_seed(42); encoder = register_module("encoder", torch::nn::Sequential( torch::nn::Linear(input_size_, hidden_size_), torch::nn::GELU() )); + auto encoder_linear = (*encoder)[0]->as(); + torch::nn::init::orthogonal_(encoder_linear->weight, std::sqrt(2.0)); + torch::nn::init::constant_(encoder_linear->bias, 0.0); decoder = register_module("decoder", torch::nn::Linear(hidden_size_, num_atns_)); + torch::nn::init::orthogonal_(decoder->weight, 0.01); + torch::nn::init::constant_(decoder->bias, 0.0); value = register_module("value", torch::nn::Linear(hidden_size_, 1)); + torch::nn::init::orthogonal_(value->weight, 1.0); + torch::nn::init::constant_(value->bias, 0.0); lstm = register_module("lstm", torch::nn::LSTM(torch::nn::LSTMOptions(hidden_size_, hidden_size_).num_layers(1))); - - cell = register_module("cell", torch::nn::LSTMCell(hidden_size_, hidden_size_)); + torch::nn::init::orthogonal_(lstm->named_parameters()["weight_ih_l0"], 1.0); + torch::nn::init::orthogonal_(lstm->named_parameters()["weight_hh_l0"], 1.0); + lstm->named_parameters()["bias_ih_l0"].data().zero_(); + lstm->named_parameters()["bias_hh_l0"].data().zero_(); // Share weights between LSTM and LSTMCell + cell = register_module("cell", torch::nn::LSTMCell(hidden_size_, hidden_size_)); cell->named_parameters()["weight_ih"] = lstm->named_parameters()["weight_ih_l0"]; cell->named_parameters()["weight_hh"] = lstm->named_parameters()["weight_hh_l0"]; cell->named_parameters()["bias_ih"] = lstm->named_parameters()["bias_ih_l0"]; cell->named_parameters()["bias_hh"] = lstm->named_parameters()["bias_hh_l0"]; - // Initialization - auto encoder_linear = (*encoder)[0]->as(); - torch::nn::init::orthogonal_(encoder_linear->weight, std::sqrt(2.0)); - torch::nn::init::constant_(encoder_linear->bias, 0.0); - torch::nn::init::orthogonal_(decoder->weight, std::sqrt(0.01)); - torch::nn::init::constant_(decoder->bias, 0.0); - - torch::nn::init::orthogonal_(lstm->named_parameters()["weight_ih_l0"], 1.0); - torch::nn::init::orthogonal_(lstm->named_parameters()["weight_hh_l0"], 1.0); - lstm->named_parameters()["bias_ih_l0"].data().zero_(); - lstm->named_parameters()["bias_hh_l0"].data().zero_(); } // Forward for evaluation/inference (uses LSTMCell) @@ -280,19 +283,19 @@ std::unique_ptr create_pufferl(int64_t input_size, int64_t num_atns, int64_t hidden_size, double lr, double beta1, double beta2, double eps, int64_t max_epochs) { // Enable cuDNN benchmarking - torch::globalContext().setBenchmarkCuDNN(true); - torch::globalContext().setDeterministicCuDNN(false); - torch::globalContext().setBenchmarkLimitCuDNN(32); + //torch::globalContext().setBenchmarkCuDNN(true); + //torch::globalContext().setDeterministicCuDNN(false); + //torch::globalContext().setBenchmarkLimitCuDNN(32); // Enable TF32 for faster FP32 math (uses Tensor Cores on 4090) - torch::globalContext().setAllowTF32CuBLAS(true); - torch::globalContext().setAllowTF32CuDNN(true); + //torch::globalContext().setAllowTF32CuBLAS(true); + //torch::globalContext().setAllowTF32CuDNN(true); // Enable faster FP16 reductions - torch::globalContext().setAllowFP16ReductionCuBLAS(true); + //torch::globalContext().setAllowFP16ReductionCuBLAS(true); // BF16 reduction (if using bfloat16) - torch::globalContext().setAllowBF16ReductionCuBLAS(true); + //torch::globalContext().setAllowBF16ReductionCuBLAS(true); // Random seed torch::manual_seed(42); diff --git a/pufferlib/models.py b/pufferlib/models.py index 2a1255cd7..ff3f572d7 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -38,6 +38,10 @@ def __init__(self, env, hidden_size=128): self.encoder = nn.Linear(input_size, self.hidden_size) else: num_obs = np.prod(env.single_observation_space.shape) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + torch.manual_seed(42) + torch.cuda.manual_seed(42) self.encoder = torch.nn.Sequential( pufferlib.pytorch.layer_init(nn.Linear(num_obs, hidden_size)), nn.GELU(), @@ -61,6 +65,19 @@ def __init__(self, env, hidden_size=128): self.value = pufferlib.pytorch.layer_init( nn.Linear(hidden_size, 1), std=1) + self.lstm = nn.LSTM(hidden_size, hidden_size) + nn.init.orthogonal_(self.lstm.weight_ih_l0, 1.0) + nn.init.orthogonal_(self.lstm.weight_hh_l0, 1.0) + self.lstm.bias_ih_l0.data.zero_() + self.lstm.bias_hh_l0.data.zero_() + + self.cell = torch.nn.LSTMCell(hidden_size, hidden_size) + self.cell.weight_ih = self.lstm.weight_ih_l0 + self.cell.weight_hh = self.lstm.weight_hh_l0 + self.cell.bias_ih = self.lstm.bias_ih_l0 + self.cell.bias_hh = self.lstm.bias_hh_l0 + + def forward_eval(self, observations, state=None): hidden = self.encode_observations(observations, state=state) logits, values = self.decode_actions(hidden) @@ -96,6 +113,76 @@ def decode_actions(self, hidden): values = self.value(hidden) return logits, values + def forward_eval(self, observations, state): + '''Forward function for inference. 3x faster than using LSTM directly''' + hidden = self.encode_observations(observations, state=state) + h = state['lstm_h'] + c = state['lstm_c'] + + # TODO: Don't break compile + if h is not None: + assert h.shape[0] == c.shape[0] == observations.shape[0], 'LSTM state must be (h, c)' + lstm_state = (h, c) + else: + lstm_state = None + + #hidden = self.pre_layernorm(hidden) + hidden, c = self.cell(hidden, lstm_state) + #hidden = self.post_layernorm(hidden) + state['hidden'] = hidden + state['lstm_h'] = hidden + state['lstm_c'] = c + logits, values = self.decode_actions(hidden) + return logits, values + + def forward(self, observations, state): + '''Forward function for training. Uses LSTM for fast time-batching''' + x = observations + lstm_h = state['lstm_h'] + lstm_c = state['lstm_c'] + + x_shape, space_shape = x.shape, self.obs_shape + x_n, space_n = len(x_shape), len(space_shape) + if x_shape[-space_n:] != space_shape: + raise ValueError('Invalid input tensor shape', x.shape) + + if x_n == space_n + 1: + B, TT = x_shape[0], 1 + elif x_n == space_n + 2: + B, TT = x_shape[:2] + else: + raise ValueError('Invalid input tensor shape', x.shape) + + if lstm_h is not None: + assert lstm_h.shape[1] == lstm_c.shape[1] == B, 'LSTM state must be (h, c)' + lstm_state = (lstm_h, lstm_c) + else: + lstm_state = None + + x = x.reshape(B*TT, *space_shape) + hidden = self.encode_observations(x, state) + assert hidden.shape == (B*TT, self.input_size) + + hidden = hidden.reshape(B, TT, self.input_size) + + hidden = hidden.transpose(0, 1) + #hidden = self.pre_layernorm(hidden) + hidden, (lstm_h, lstm_c) = self.lstm.forward(hidden, lstm_state) + hidden = hidden.float() + + #hidden = self.post_layernorm(hidden) + hidden = hidden.transpose(0, 1) + + flat_hidden = hidden.reshape(B*TT, self.hidden_size) + logits, values = self.decode_actions(flat_hidden) + values = values.reshape(B, TT) + #state.batch_logits = logits.reshape(B, TT, -1) + state['hidden'] = hidden + state['lstm_h'] = lstm_h.detach() + state['lstm_c'] = lstm_c.detach() + return logits, values + + class LSTMWrapper(nn.Module): def __init__(self, env, policy, hidden_size=128): '''Wraps your policy with an LSTM without letting you shoot yourself in the diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index ff4d0e9b7..7597b1c24 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -92,12 +92,6 @@ def __init__(self, config, logger=None, verbose=True): torch.backends.cudnn.deterministic = config['torch_deterministic'] torch.backends.cudnn.benchmark = True - # Reproducibility - seed = config['seed'] - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - num_envs = 4096 self.num_envs = num_envs grid_size = 11 @@ -105,6 +99,12 @@ def __init__(self, config, logger=None, verbose=True): vecenv = SquaredEnv(num_envs, grid_size) vecenv.reset() + # Reproducibility + seed = config['seed'] + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + from gymnasium.spaces import Box, Discrete obs_space = Box(low=-1, high=1, shape=(grid_size*grid_size,), dtype=np.float32) atn_space = Discrete(5) @@ -805,9 +805,12 @@ def download(self): def check(env_name): args = load_config(env_name) vecenv = load_env(env_name, args) + + torch.manual_seed(args['train']['seed']) policy = load_policy(args, vecenv, env_name) - train_config = dict(**args['train'], env=env_name) + import pufferlib.python_pufferl + train_config = dict(**args['train'], env=env_name) pufferl_python = pufferlib.python_pufferl.PuffeRL(train_config, vecenv, policy) # TODO: remember to set seet again before this @@ -817,10 +820,13 @@ def check(env_name): pufferl_cpp = PuffeRL(train_config) python_params = dict(policy.named_parameters()) for k, v in pufferl_cpp.pufferl_cpp.policy_32.named_parameters(): - v_python = python_params[f'policy.{k}'].data + # For some reason, cpp records twice + if 'cell' in k: + continue + + v_python = python_params[k].data assert torch.allclose(v, v_python), k - breakpoint() pufferl_cpp.evaluate() pufferl_cpp.train() @@ -1241,11 +1247,12 @@ def load_policy(args, vecenv, env_name=''): policy_cls = getattr(env_module.torch, args['policy_name']) policy = policy_cls(vecenv.driver_env, **args['policy']) + ''' rnn_name = args['rnn_name'] if rnn_name is not None: rnn_cls = getattr(env_module.torch, args['rnn_name']) policy = rnn_cls(vecenv.driver_env, policy, **args['policy']) - + ''' policy = policy.to(device) load_id = args['load_id'] diff --git a/pufferlib/python_pufferl.py b/pufferlib/python_pufferl.py index d251a1fa5..8a771204e 100644 --- a/pufferlib/python_pufferl.py +++ b/pufferlib/python_pufferl.py @@ -1291,10 +1291,12 @@ def load_policy(args, vecenv, env_name=''): policy_cls = getattr(env_module.torch, args['policy_name']) policy = policy_cls(vecenv.driver_env, **args['policy']) + ''' rnn_name = args['rnn_name'] if rnn_name is not None: rnn_cls = getattr(env_module.torch, args['rnn_name']) policy = rnn_cls(vecenv.driver_env, policy, **args['policy']) + ''' policy = policy.to(device) From c0b6cf5e0b04c17c794add5a37e6d1a978569edb Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 30 Oct 2025 12:50:29 +0000 Subject: [PATCH 083/188] almost there! --- pufferlib/extensions/cuda/squared_torch.cu | 70 +++++++++------------- pufferlib/extensions/pufferlib.cpp | 68 +++++++++++++++++---- pufferlib/models.py | 13 ++-- pufferlib/pufferl.py | 69 ++++++++++++++------- test_squared_torch.py | 2 +- 5 files changed, 140 insertions(+), 82 deletions(-) diff --git a/pufferlib/extensions/cuda/squared_torch.cu b/pufferlib/extensions/cuda/squared_torch.cu index 9fd384a0a..8f137a30b 100644 --- a/pufferlib/extensions/cuda/squared_torch.cu +++ b/pufferlib/extensions/cuda/squared_torch.cu @@ -39,31 +39,29 @@ struct __align__(16) Squared { int padding[3]; }; +__device__ void add_log(Squared* env) { + env->log.perf += (env->rewards[0] > 0) ? 1 : 0; + env->log.score += env->rewards[0]; + env->log.episode_length += env->tick; + env->log.episode_return += env->rewards[0]; + env->log.n++; +} + // Device: Reset environment __device__ void cuda_reset(Squared* env, curandState* rng) { int tiles = env->size * env->size; - int center = env->size / 2 * env->size + env->size / 2; - - // Clear grid for (int i = 0; i < tiles; i++) { env->observations[i] = EMPTY; } - - // Place agent at center - env->observations[center] = AGENT; - env->r = env->size / 2; - env->c = env->size / 2; + env->observations[tiles/2] = AGENT; + env->r = env->size/2; + env->c = env->size/2; env->tick = 0; - - // Place target randomly (not on agent) - int target_idx = center; // Deterministic for testing - /* + int target_idx = 0; // Deterministic for testing do { target_idx = curand(rng) % tiles; - } while (target_idx == center); - */ - + } while (target_idx == tiles/2); env->observations[target_idx] = TARGET; } @@ -75,9 +73,8 @@ __device__ void cuda_step(Squared* env) { env->rewards[0] = 0.0f; int pos = env->r * env->size + env->c; - env->observations[pos] = EMPTY; // Clear old agent pos + env->observations[pos] = EMPTY; - // Move agent if (action == DOWN) { env->r += 1; } else if (action == UP) { @@ -88,41 +85,33 @@ __device__ void cuda_step(Squared* env) { env->c -= 1; } - pos = env->r * env->size + env->c; - // Check bounds and timeout - if (env->r < 0 || env->c < 0 || env->r >= env->size - || env->c >= env->size || env->tick > 3 * env->size) { + if (env->tick > 3 * env->size + || env->r < 0 + || env->c < 0 + || env->r >= env->size + || env->c >= env->size) { env->terminals[0] = 1; env->rewards[0] = -1.0f; - env->log.perf += 0; - env->log.score += -1.0f; - env->log.episode_return += -1.0f; - env->log.episode_length += env->tick; - env->log.n += 1; + add_log(env); cuda_reset(env, &env->rng); return; } - // Check if reached target + pos = env->r*env->size + env->c; if (env->observations[pos] == TARGET) { env->terminals[0] = 1; env->rewards[0] = 1.0f; - env->log.perf += 1; - env->log.score += 1.0f; - env->log.episode_return += 1.0f; - env->log.episode_length += env->tick; - env->log.n += 1; + add_log(env); cuda_reset(env, &env->rng); return; } - // Place agent env->observations[pos] = AGENT; } // Kernel: Step all environments -__global__ void step_environments(Squared* envs, int num_envs) { +__global__ void step_environments(Squared* envs, int* indices, int num_envs) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= num_envs) return; cuda_step(&envs[idx]); @@ -169,11 +158,7 @@ __global__ void init_environments(Squared* envs, env->actions = actions_mem + idx; env->rewards = rewards_mem + idx; env->terminals = terminals_mem + idx; - env->size = grid_size; - env->tick = 0; - env->r = grid_size / 2; - env->c = grid_size / 2; // Initialize RNG curand_init(clock64(), idx, 0, &env->rng); @@ -214,9 +199,11 @@ create_squared_environments(int64_t num_envs, int64_t grid_size, torch::Tensor d return std::make_tuple(envs_tensor, obs, actions, rewards, terminals); } -void step_environments_cuda(torch::Tensor envs_tensor, int64_t num_envs) { +void step_environments_cuda(torch::Tensor envs_tensor, torch::Tensor indices_tensor) { Squared* envs = reinterpret_cast(envs_tensor.data_ptr()); - step_environments<<>>(envs, num_envs); + auto indices = indices_tensor.data_ptr(); + int num_envs = indices_tensor.size(0); + step_environments<<>>(envs, indices, num_envs); cudaDeviceSynchronize(); } @@ -231,7 +218,8 @@ void reset_environments_cuda(torch::Tensor envs_tensor, torch::Tensor indices_te Log log_environments_cuda(torch::Tensor envs_tensor, torch::Tensor indices_tensor) { Squared* envs = reinterpret_cast(envs_tensor.cpu().data_ptr()); - auto indices = indices_tensor.cpu().data_ptr(); + torch::Tensor cpu_indices = indices_tensor.cpu(); + auto indices = cpu_indices.data_ptr(); int num_log = indices_tensor.size(0); Log log = {0}; for (int i=0; inamed_parameters()["bias_ih_l0"].data().zero_(); lstm->named_parameters()["bias_hh_l0"].data().zero_(); - // Share weights between LSTM and LSTMCell - cell = register_module("cell", torch::nn::LSTMCell(hidden_size_, hidden_size_)); - cell->named_parameters()["weight_ih"] = lstm->named_parameters()["weight_ih_l0"]; - cell->named_parameters()["weight_hh"] = lstm->named_parameters()["weight_hh_l0"]; - cell->named_parameters()["bias_ih"] = lstm->named_parameters()["bias_ih_l0"]; - cell->named_parameters()["bias_hh"] = lstm->named_parameters()["bias_hh_l0"]; - + // Share weights between LSTM and LSTMCell. Do not register or you'll double-update during optim. + cell = torch::nn::LSTMCell(hidden_size_, hidden_size_); + cell->named_parameters()["weight_ih"].data() = lstm->named_parameters()["weight_ih_l0"].data(); + cell->named_parameters()["weight_hh"].data() = lstm->named_parameters()["weight_hh_l0"].data(); + cell->named_parameters()["bias_ih"].data() = lstm->named_parameters()["bias_ih_l0"].data(); + cell->named_parameters()["bias_hh"].data() = lstm->named_parameters()["bias_hh_l0"].data(); + cell->to(torch::kCUDA); } // Forward for evaluation/inference (uses LSTMCell) @@ -195,6 +195,10 @@ class PolicyLSTM : public torch::nn::Module { auto hidden_out = std::get<0>(cell_out); auto c_out = std::get<1>(cell_out); + //std::std::cout << std::fixed << std::setprecision(10); + //std::std::cout << "Hidden 0 cpp: " << hidden_out[0][0].item() << std::std::endl; + + auto logits = decoder->forward(hidden_out); auto values = value->forward(hidden_out); @@ -275,6 +279,7 @@ typedef struct { PolicyLSTM* policy_16; PolicyLSTM* policy_32; torch::optim::Adam* optimizer; + //torch::optim::SGD* optimizer; double lr; int64_t max_epochs; } PuffeRL; @@ -308,6 +313,7 @@ std::unique_ptr create_pufferl(int64_t input_size, policy_32->to(torch::kCUDA); auto optimizer = new torch::optim::Adam(policy_32->parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)); + //auto optimizer = new torch::optim::SGD(policy_32->parameters(), torch::optim::SGDOptions(lr)); auto pufferl = std::make_unique(); pufferl->policy_16 = policy_16; @@ -322,6 +328,7 @@ std::unique_ptr create_pufferl(int64_t input_size, std::tuple compiled_evaluate( pybind11::object pufferl_obj, torch::Tensor envs_tensor, + torch::Tensor indices_tensor, torch::Tensor obs, torch::Tensor actions, torch::Tensor rewards, @@ -362,7 +369,7 @@ std::tuple compiled_evaluate( actions.copy_(action); { pybind11::gil_scoped_release no_gil; - step_environments_cuda(envs_tensor, num_envs); + step_environments_cuda(envs_tensor, indices_tensor); } rewards.clamp_(-1.0f, 1.0f); } @@ -429,24 +436,27 @@ pybind11::dict compiled_train( torch::Tensor clipfrac_sum = torch::zeros({}, torch::kFloat32).to(device); torch::Tensor importance_sum = torch::zeros({}, torch::kFloat32).to(device); - optimizer->zero_grad(); // Start with clean grads auto advantages = torch::zeros_like(values); for (int64_t mb = 0; mb < total_minibatches; ++mb) { + //for (int64_t mb = 0; mb < 1; ++mb) { advantages = torch::zeros_like(values); compute_puff_advantage_cuda( values, rewards, terminals, ratio, advantages, gamma, gae_lambda, vtrace_rho_clip, vtrace_c_clip ); + //std::cout << "Adv: " << advantages.mean().item() << std::endl; // Prioritization auto adv = advantages.abs().sum(1); // [num_envs] auto prio_weights = adv.pow(prio_alpha).nan_to_num_(0.0, 0.0, 0.0); auto prio_probs = (prio_weights + 1e-6)/(prio_weights.sum() + 1e-6); - auto idx = at::multinomial(prio_probs, minibatch_segments); // #Replacement? + auto idx = at::multinomial(prio_probs, minibatch_segments, true); auto mb_prio = torch::pow(segments*prio_probs.index_select(0, idx).unsqueeze(1), -anneal_beta); + //std::cout << "Prio: " << mb_prio.mean().item() << std::endl; + // Index into data torch::Tensor mb_obs = observations.index_select(0, idx); torch::Tensor mb_actions = actions.index_select(0, idx); @@ -455,6 +465,15 @@ pybind11::dict compiled_train( torch::Tensor mb_advantages = advantages.index_select(0, idx); torch::Tensor mb_returns = mb_advantages + mb_values; + /* + std::cout << "mb_obs: " << mb_obs.sum().item() << std::endl; + std::cout << "mb_actions: " << mb_actions.sum().item() << std::endl; + std::cout << "mb_logprobs: " << mb_logprobs.mean().item() << std::endl; + std::cout << "mb_values: " << mb_values.mean().item() << std::endl; + std::cout << "mb_advantages: " << mb_advantages.mean().item() << std::endl; + std::cout << "mb_returns: " << mb_returns.mean().item() << std::endl; + */ + // Reshape obs if not using RNN if (!use_rnn) { auto flat_shape = std::vector{-1, mb_obs.size(2), mb_obs.size(3)}; @@ -472,6 +491,8 @@ pybind11::dict compiled_train( // Forward pass auto [logits, newvalue] = policy_32->forward_train(mb_obs.to(torch::kFloat32), mb_lstm_h, mb_lstm_c); + //std::cout << "logits: " << logits.mean().item() << std::endl; + // Flatten for action lookup auto flat_logits = logits.reshape({-1, logits.size(-1)}); auto flat_actions = mb_actions.reshape({-1}); @@ -483,10 +504,15 @@ pybind11::dict compiled_train( auto newlogprob = newlogprob_flat.reshape({minibatch_segments, horizon}); auto entropy = - (probs_new * logprobs_new).sum(1).mean(); // mean over batch + //std::cout << "newlogprob: " << newlogprob.mean().item() << std::endl; + //std::cout << "entropy: " << entropy << std::endl; + // Compute ratio auto logratio = newlogprob - mb_logprobs; auto ratio_new = logratio.exp(); + //std::cout << "ratio_new: " << std::fixed << std::setprecision(20) << ratio_new.min().item() << std::endl; + // Update global ratio and values in-place (matches Python) ratio.index_copy_(0, idx, ratio_new.detach().squeeze(-1).to(torch::kFloat32)); values.index_copy_(0, idx, newvalue.detach().squeeze(-1).to(torch::kFloat32)); @@ -495,11 +521,15 @@ pybind11::dict compiled_train( auto adv_normalized = mb_advantages; adv_normalized = mb_prio * (adv_normalized - adv_normalized.mean()) / (adv_normalized.std() + 1e-8); + //std::cout << "adv_normalized: " << std::fixed << std::setprecision(20) << adv_normalized.mean().item() << std::endl; + // Policy loss auto pg_loss1 = -adv_normalized * ratio_new; auto pg_loss2 = -adv_normalized * torch::clamp(ratio_new, 1.0 - clip_coef, 1.0 + clip_coef); auto pg_loss = torch::max(pg_loss1, pg_loss2).mean(); + //std::cout << "pg_loss: " << pg_loss << std::endl; + // Value loss newvalue = newvalue.view(mb_returns.sizes()); auto v_clipped = mb_values + torch::clamp(newvalue - mb_values, -vf_clip_coef, vf_clip_coef); @@ -507,12 +537,16 @@ pybind11::dict compiled_train( auto v_loss_clipped = (v_clipped - mb_returns).pow(2); auto v_loss = 0.5 * torch::max(v_loss_unclipped, v_loss_clipped).mean(); + //std::cout << "v_loss: " << v_loss << std::endl; + // Entropy auto entropy_loss = entropy; // Already mean // Total loss auto loss = pg_loss + vf_coef*v_loss - ent_coef*entropy_loss; + //std::cout << "loss: " << loss << std::endl; + // Accumulate stats pg_sum += pg_loss.detach(); v_sum += v_loss.detach(); @@ -542,6 +576,18 @@ pybind11::dict compiled_train( // Gradient accumulation and step if ((mb + 1) % accumulate_minibatches == 0) { torch::nn::utils::clip_grad_norm_(policy_32->parameters(), max_grad_norm); + + /* + // Print grads + for (auto& param : policy_32->parameters()) { + std::cout << param.grad().abs().sum() << std::endl; + } + */ + + // Print current lr + //std::cout << "Current lr: " + // << optimizer->param_groups()[0].options().get_lr() + // << std::endl; optimizer->step(); optimizer->zero_grad(); } diff --git a/pufferlib/models.py b/pufferlib/models.py index ff3f572d7..3eebc86ee 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -27,6 +27,9 @@ def __init__(self, env, hidden_size=128): pufferlib.spaces.MultiDiscrete) self.is_continuous = isinstance(env.single_action_space, pufferlib.spaces.Box) + + self.input_size = hidden_size + self.obs_shape = env.single_observation_space.shape try: self.is_dict_obs = isinstance(env.env.observation_space, pufferlib.spaces.Dict) except: @@ -76,16 +79,7 @@ def __init__(self, env, hidden_size=128): self.cell.weight_hh = self.lstm.weight_hh_l0 self.cell.bias_ih = self.lstm.bias_ih_l0 self.cell.bias_hh = self.lstm.bias_hh_l0 - - def forward_eval(self, observations, state=None): - hidden = self.encode_observations(observations, state=state) - logits, values = self.decode_actions(hidden) - return logits, values - - def forward(self, observations, state=None): - return self.forward_eval(observations, state) - def encode_observations(self, observations, state=None): '''Encodes a batch of observations into hidden states. Assumes no time dimension (handled by LSTM wrappers).''' @@ -128,6 +122,7 @@ def forward_eval(self, observations, state): #hidden = self.pre_layernorm(hidden) hidden, c = self.cell(hidden, lstm_state) + #print('Hidden 0 python:', hidden[0, 0]) #hidden = self.post_layernorm(hidden) state['hidden'] = hidden state['lstm_h'] = hidden diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 7597b1c24..6faa14026 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -88,9 +88,11 @@ class PuffeRL: #def __init__(self, config, policy, logger=None, verbose=True): def __init__(self, config, logger=None, verbose=True): # Backend perf optimization - torch.set_float32_matmul_precision('high') - torch.backends.cudnn.deterministic = config['torch_deterministic'] - torch.backends.cudnn.benchmark = True + #torch.set_float32_matmul_precision('high') + torch.backends.cudnn.deterministic = True #config['torch_deterministic'] + torch.backends.cudnn.benchmark = False #True + torch.backends.cuda.matmul.allow_tf32 = False + torch.backends.cudnn.allow_tf32 = False num_envs = 4096 self.num_envs = num_envs @@ -257,6 +259,7 @@ def evaluate(self): lstm_h, lstm_c = _C.compiled_evaluate( self.pufferl_cpp, self.vecenv.env, + self.vecenv.indices, self.vecenv.observations, self.vecenv.actions, self.vecenv.rewards, @@ -273,6 +276,7 @@ def evaluate(self): self.num_envs ) + torch.cuda.synchronize() logs = self.vecenv.log() self.stats['perf'] = [logs.perf] self.stats['score'] = [logs.score] @@ -806,42 +810,67 @@ def check(env_name): args = load_config(env_name) vecenv = load_env(env_name, args) + args['train']['optimizer'] = 'adam' + + torch.set_printoptions(precision=10) torch.manual_seed(args['train']['seed']) policy = load_policy(args, vecenv, env_name) import pufferlib.python_pufferl train_config = dict(**args['train'], env=env_name) - pufferl_python = pufferlib.python_pufferl.PuffeRL(train_config, vecenv, policy) - - # TODO: remember to set seet again before this - #pufferl_python.evaluate() - #pufferl_python.train() + pufferl_python = pufferlib.python_pufferl.PuffeRL(train_config, vecenv, policy, verbose=False) - pufferl_cpp = PuffeRL(train_config) + pufferl_cpp = PuffeRL(train_config, verbose=False) python_params = dict(policy.named_parameters()) for k, v in pufferl_cpp.pufferl_cpp.policy_32.named_parameters(): # For some reason, cpp records twice - if 'cell' in k: - continue + if k == 'cell.weight_ih': + v_python = python_params['lstm.weight_ih_l0'].data + elif k == 'cell.weight_hh': + v_python = python_params['lstm.weight_hh_l0'].data + elif k == 'cell.bias_ih': + v_python = python_params['lstm.bias_ih_l0'].data + elif k == 'cell.bias_hh': + v_python = python_params['lstm.bias_hh_l0'].data + else: + v_python = python_params[k].data - v_python = python_params[k].data + print(k, v.view(-1)[0]) assert torch.allclose(v, v_python), k + torch.manual_seed(args['train']['seed']) + pufferl_python.evaluate() + pufferl_python.train() + + torch.manual_seed(args['train']['seed']) pufferl_cpp.evaluate() pufferl_cpp.train() - for i in range(pufferl_python.observations.shape[1]): + for i in range(args['train']['bptt_horizon']): assert torch.allclose(pufferl_python.observations[:, i].float(), pufferl_cpp.observations[:, i]), f'Observation {i} mismatch' + assert torch.allclose(pufferl_python.actions[:, i], pufferl_cpp.actions[:, i]), f'Action {i} mismatch' + assert torch.allclose(pufferl_python.rewards[:, i], pufferl_cpp.rewards[:, i]), f'Reward {i} mismatch' + assert torch.allclose(pufferl_python.terminals[:, i], pufferl_cpp.terminals[:, i]), f'Terminal {i} mismatch' + assert torch.allclose(pufferl_python.logprobs[:, i], pufferl_cpp.logprobs[:, i]), f'Logprob {i} mismatch' + assert torch.allclose(pufferl_python.values[:, i], pufferl_cpp.values[:, i]), f'Value {i} mismatch' - assert torch.allclose(pufferl_python.observations.float(), pufferl_cpp.observations) - assert torch.allclose(pufferl_python.actions, pufferl_cpp.actions) - assert torch.allclose(pufferl_python.rewards, pufferl_cpp.rewards) - assert torch.allclose(pufferl_python.terminals, pufferl_cpp.terminals) - assert torch.allclose(pufferl_python.values, pufferl_cpp.values) + python_params = dict(policy.named_parameters()) + for k, v in pufferl_cpp.pufferl_cpp.policy_32.named_parameters(): + # For some reason, cpp records twice + if k == 'cell.weight_ih': + v_python = python_params['lstm.weight_ih_l0'].data + elif k == 'cell.weight_hh': + v_python = python_params['lstm.weight_hh_l0'].data + elif k == 'cell.bias_ih': + v_python = python_params['lstm.bias_ih_l0'].data + elif k == 'cell.bias_hh': + v_python = python_params['lstm.bias_hh_l0'].data + else: + v_python = python_params[k].data - for k, v in policy.named_parameters(): - assert torch.allclose(v, pufferl_cpp.policy.named_parameters()[k].data) + print(k, v.view(-1)[0]) + assert torch.allclose(v, v_python), k print('Check passed') diff --git a/test_squared_torch.py b/test_squared_torch.py index 8a503f0b2..1418bf9bc 100644 --- a/test_squared_torch.py +++ b/test_squared_torch.py @@ -78,7 +78,7 @@ actions[:] = atns # Step environment - _C.step_environments(envs, num_envs) + _C.step_environments(envs, indices) torch.cuda.synchronize() end = time.time() From eeba3eda08839827581bd5c83e4e58ce8616b10f Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 30 Oct 2025 19:29:31 +0000 Subject: [PATCH 084/188] Assert check passes --- pufferlib/config/ocean/squared.ini | 5 +- pufferlib/extensions/cuda/squared_torch.cu | 2 + pufferlib/extensions/pufferlib.cpp | 105 +++++++++++++++------ pufferlib/ocean/squared/squared.h | 2 +- pufferlib/pufferl.py | 30 +++++- pufferlib/python_pufferl.py | 40 ++++++++ 6 files changed, 149 insertions(+), 35 deletions(-) diff --git a/pufferlib/config/ocean/squared.ini b/pufferlib/config/ocean/squared.ini index d13c3e9f9..d6f8e7ce7 100644 --- a/pufferlib/config/ocean/squared.ini +++ b/pufferlib/config/ocean/squared.ini @@ -14,7 +14,8 @@ hidden_size = 128 num_envs = 4096 [train] -total_timesteps = 20_000_000 -gamma = 0.95 +optimizer = adam +total_timesteps = 200_000_000 +gamma = 0.99 learning_rate = 0.01 minibatch_size = 32768 diff --git a/pufferlib/extensions/cuda/squared_torch.cu b/pufferlib/extensions/cuda/squared_torch.cu index 8f137a30b..cd3673a33 100644 --- a/pufferlib/extensions/cuda/squared_torch.cu +++ b/pufferlib/extensions/cuda/squared_torch.cu @@ -59,9 +59,11 @@ __device__ void cuda_reset(Squared* env, curandState* rng) { env->c = env->size/2; env->tick = 0; int target_idx = 0; // Deterministic for testing + /* do { target_idx = curand(rng) % tiles; } while (target_idx == tiles/2); + */ env->observations[target_idx] = TARGET; } diff --git a/pufferlib/extensions/pufferlib.cpp b/pufferlib/extensions/pufferlib.cpp index fc8817bee..f3431d304 100644 --- a/pufferlib/extensions/pufferlib.cpp +++ b/pufferlib/extensions/pufferlib.cpp @@ -164,7 +164,7 @@ class PolicyLSTM : public torch::nn::Module { cell->named_parameters()["weight_hh"].data() = lstm->named_parameters()["weight_hh_l0"].data(); cell->named_parameters()["bias_ih"].data() = lstm->named_parameters()["bias_ih_l0"].data(); cell->named_parameters()["bias_hh"].data() = lstm->named_parameters()["bias_hh_l0"].data(); - cell->to(torch::kCUDA); + //cell->to(torch::kCUDA); } // Forward for evaluation/inference (uses LSTMCell) @@ -278,8 +278,8 @@ void sync_fp16_fp32(pufferlib::PolicyLSTM* policy_16, pufferlib::PolicyLSTM* pol typedef struct { PolicyLSTM* policy_16; PolicyLSTM* policy_32; - torch::optim::Adam* optimizer; - //torch::optim::SGD* optimizer; + //torch::optim::Adam* optimizer; + torch::optim::SGD* optimizer; double lr; int64_t max_epochs; } PuffeRL; @@ -306,14 +306,14 @@ std::unique_ptr create_pufferl(int64_t input_size, torch::manual_seed(42); auto policy_16 = new PolicyLSTM(input_size, num_atns, hidden_size); - policy_16->to(torch::kCUDA); + //policy_16->to(torch::kCUDA); policy_16->to(torch::kFloat32); auto policy_32 = new PolicyLSTM(input_size, num_atns, hidden_size); - policy_32->to(torch::kCUDA); + //policy_32->to(torch::kCUDA); - auto optimizer = new torch::optim::Adam(policy_32->parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)); - //auto optimizer = new torch::optim::SGD(policy_32->parameters(), torch::optim::SGDOptions(lr)); + //auto optimizer = new torch::optim::Adam(policy_32->parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)); + auto optimizer = new torch::optim::SGD(policy_32->parameters(), torch::optim::SGDOptions(lr)); auto pufferl = std::make_unique(); pufferl->policy_16 = policy_16; @@ -350,7 +350,7 @@ std::tuple compiled_evaluate( torch::NoGradGuard no_grad; for (int64_t i = 0; i < horizon; ++i) { - auto [logits, value, lstm_h_out, lstm_c_out] = policy->forward(obs.to(torch::kFloat32), lstm_h, lstm_c); + auto [logits, value, lstm_h_out, lstm_c_out] = policy->forward(obs.to(torch::kFloat32).to(torch::kCPU), lstm_h, lstm_c); lstm_h = lstm_h_out; lstm_c = lstm_c_out; @@ -359,14 +359,14 @@ std::tuple compiled_evaluate( auto logprob = logprobs.gather(1, action.unsqueeze(1)).squeeze(1); // Store - obs_buffer.select(1, i).copy_(obs.to(torch::kFloat32)); - act_buffer.select(1, i).copy_(action.to(torch::kInt32)); - logprob_buffer.select(1, i).copy_(logprob.to(torch::kFloat32)); - rew_buffer.select(1, i).copy_(rewards.to(torch::kFloat32)); - term_buffer.select(1, i).copy_(terminals.to(torch::kFloat32)); - val_buffer.select(1, i).copy_(value.flatten().to(torch::kFloat32)); - - actions.copy_(action); + obs_buffer.select(1, i).copy_(obs.to(torch::kFloat32).to(torch::kCPU)); + act_buffer.select(1, i).copy_(action.to(torch::kInt32).to(torch::kCPU)); + logprob_buffer.select(1, i).copy_(logprob.to(torch::kFloat32).to(torch::kCPU)); + rew_buffer.select(1, i).copy_(rewards.to(torch::kFloat32).to(torch::kCPU)); + term_buffer.select(1, i).copy_(terminals.to(torch::kFloat32).to(torch::kCPU)); + val_buffer.select(1, i).copy_(value.flatten().to(torch::kFloat32).to(torch::kCPU)); + + actions.copy_(action.to(torch::kCUDA)); { pybind11::gil_scoped_release no_gil; step_environments_cuda(envs_tensor, indices_tensor); @@ -377,6 +377,49 @@ std::tuple compiled_evaluate( return std::make_tuple(lstm_h, lstm_c); } +std::tuple evaluate_step( + pybind11::object pufferl_obj, + torch::Tensor envs_tensor, + torch::Tensor indices_tensor, + torch::Tensor obs, + torch::Tensor actions, + torch::Tensor rewards, + torch::Tensor terminals, + torch::Tensor lstm_h, + torch::Tensor lstm_c, + torch::Tensor obs_buffer, + torch::Tensor act_buffer, + torch::Tensor logprob_buffer, + torch::Tensor rew_buffer, + torch::Tensor term_buffer, + torch::Tensor val_buffer, + int64_t i +) { + auto& pufferl = pufferl_obj.cast(); + auto& policy = pufferl.policy_32; + + torch::NoGradGuard no_grad; + + auto [logits, value, lstm_h_out, lstm_c_out] = policy->forward(obs.to(torch::kFloat32), lstm_h, lstm_c); + lstm_h = lstm_h_out; + lstm_c = lstm_c_out; + + auto logprobs = torch::log_softmax(logits, 1); + auto action = at::multinomial(logprobs.exp(), 1, true).squeeze(1); + auto logprob = logprobs.gather(1, action.unsqueeze(1)).squeeze(1); + + // Store + obs_buffer.select(1, i).copy_(obs.to(torch::kFloat32)); + act_buffer.select(1, i).copy_(action.to(torch::kInt32)); + logprob_buffer.select(1, i).copy_(logprob.to(torch::kFloat32)); + rew_buffer.select(1, i).copy_(rewards.to(torch::kFloat32)); + term_buffer.select(1, i).copy_(terminals.to(torch::kFloat32)); + val_buffer.select(1, i).copy_(value.flatten().to(torch::kFloat32)); + + actions.copy_(action); + return std::make_tuple(lstm_h, lstm_c); +} + pybind11::dict compiled_train( pybind11::object pufferl_obj, torch::Tensor observations, // [num_envs, horizon, ...] @@ -441,7 +484,7 @@ pybind11::dict compiled_train( for (int64_t mb = 0; mb < total_minibatches; ++mb) { //for (int64_t mb = 0; mb < 1; ++mb) { advantages = torch::zeros_like(values); - compute_puff_advantage_cuda( + compute_puff_advantage_cpu( values, rewards, terminals, ratio, advantages, gamma, gae_lambda, vtrace_rho_clip, vtrace_c_clip @@ -466,12 +509,12 @@ pybind11::dict compiled_train( torch::Tensor mb_returns = mb_advantages + mb_values; /* - std::cout << "mb_obs: " << mb_obs.sum().item() << std::endl; - std::cout << "mb_actions: " << mb_actions.sum().item() << std::endl; - std::cout << "mb_logprobs: " << mb_logprobs.mean().item() << std::endl; - std::cout << "mb_values: " << mb_values.mean().item() << std::endl; - std::cout << "mb_advantages: " << mb_advantages.mean().item() << std::endl; - std::cout << "mb_returns: " << mb_returns.mean().item() << std::endl; + std::cout << "mb_obs: " << mb_obs.sum() << std::endl; + std::cout << "mb_actions: " << mb_actions.sum() << std::endl; + std::cout << "mb_logprobs: " << mb_logprobs.min() << std::endl; + std::cout << "mb_values: " << mb_values.min() << std::endl; + std::cout << "mb_advantages: " << mb_advantages.min() << std::endl; + std::cout << "mb_returns: " << mb_returns.min() << std::endl; */ // Reshape obs if not using RNN @@ -489,9 +532,9 @@ pybind11::dict compiled_train( torch::Tensor mb_lstm_c = torch::zeros_like(mb_lstm_h); // Forward pass - auto [logits, newvalue] = policy_32->forward_train(mb_obs.to(torch::kFloat32), mb_lstm_h, mb_lstm_c); + auto [logits, newvalue] = policy_32->forward_train(mb_obs.to(torch::kFloat32).to(torch::kCPU), mb_lstm_h, mb_lstm_c); - //std::cout << "logits: " << logits.mean().item() << std::endl; + //std::cout << "logits: " << logits.mean() << std::endl; // Flatten for action lookup auto flat_logits = logits.reshape({-1, logits.size(-1)}); @@ -502,15 +545,18 @@ pybind11::dict compiled_train( // Gather logprobs for taken actions auto newlogprob_flat = logprobs_new.gather(1, flat_actions.unsqueeze(1)).squeeze(1); auto newlogprob = newlogprob_flat.reshape({minibatch_segments, horizon}); - auto entropy = - (probs_new * logprobs_new).sum(1).mean(); // mean over batch + auto entropy = - (probs_new * logprobs_new).sum(1); + + //std::cout << "newlogprob: " << newlogprob.min() << std::endl; + //std::cout << "entropy: " << entropy.min() << std::endl; - //std::cout << "newlogprob: " << newlogprob.mean().item() << std::endl; - //std::cout << "entropy: " << entropy << std::endl; + entropy = entropy.mean(); // Compute ratio auto logratio = newlogprob - mb_logprobs; auto ratio_new = logratio.exp(); + //std::cout << "logratio_new: " << std::fixed << std::setprecision(20) << logratio.min().item() << std::endl; //std::cout << "ratio_new: " << std::fixed << std::setprecision(20) << ratio_new.min().item() << std::endl; // Update global ratio and values in-place (matches Python) @@ -521,7 +567,7 @@ pybind11::dict compiled_train( auto adv_normalized = mb_advantages; adv_normalized = mb_prio * (adv_normalized - adv_normalized.mean()) / (adv_normalized.std() + 1e-8); - //std::cout << "adv_normalized: " << std::fixed << std::setprecision(20) << adv_normalized.mean().item() << std::endl; + //std::cout << "adv_normalized: " << std::fixed << std::setprecision(20) << adv_normalized.min().item() << std::endl; // Policy loss auto pg_loss1 = -adv_normalized * ratio_new; @@ -622,6 +668,7 @@ PYBIND11_MODULE(_C, m) { m.def("reset_environments", &reset_environments_cuda); m.def("log_environments", &log_environments_cuda); m.def("compiled_evaluate", &compiled_evaluate); + m.def("evaluate_step", &evaluate_step); m.def("compiled_train", &compiled_train); py::class_(m, "Log") diff --git a/pufferlib/ocean/squared/squared.h b/pufferlib/ocean/squared/squared.h index be57ea75c..4d4d5d0c5 100644 --- a/pufferlib/ocean/squared/squared.h +++ b/pufferlib/ocean/squared/squared.h @@ -58,7 +58,7 @@ void c_reset(Squared* env) { env->r = env->size/2; env->c = env->size/2; env->tick = 0; - int target_idx = tiles/2; // Deterministic for testing + int target_idx = 0; // Deterministic for testing /* do { target_idx = rand() % tiles; diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 6faa14026..c4a83ef64 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -256,6 +256,30 @@ def evaluate(self): lstm_h = torch.zeros((n, h), device=device) lstm_c = torch.zeros((n, h), device=device) + ''' + for t in range(self.config['bptt_horizon']): + lstm_h, lstm_c = _C.evaluate_step( + self.pufferl_cpp, + self.vecenv.env, + self.vecenv.indices, + self.vecenv.observations, + self.vecenv.actions, + self.vecenv.rewards, + self.vecenv.terminals, + lstm_h, + lstm_c, + self.observations, + self.actions, + self.logprobs, + self.rewards, + self.terminals, + self.values, + t + ) + self.rewards[:, t].clamp_(-1.0, 1.0) + self.vecenv.step(self.actions[:, t]) + ''' + lstm_h, lstm_c = _C.compiled_evaluate( self.pufferl_cpp, self.vecenv.env, @@ -812,7 +836,7 @@ def check(env_name): args['train']['optimizer'] = 'adam' - torch.set_printoptions(precision=10) + torch.set_printoptions(precision=16) torch.manual_seed(args['train']['seed']) policy = load_policy(args, vecenv, env_name) @@ -853,7 +877,7 @@ def check(env_name): assert torch.allclose(pufferl_python.rewards[:, i], pufferl_cpp.rewards[:, i]), f'Reward {i} mismatch' assert torch.allclose(pufferl_python.terminals[:, i], pufferl_cpp.terminals[:, i]), f'Terminal {i} mismatch' assert torch.allclose(pufferl_python.logprobs[:, i], pufferl_cpp.logprobs[:, i]), f'Logprob {i} mismatch' - assert torch.allclose(pufferl_python.values[:, i], pufferl_cpp.values[:, i]), f'Value {i} mismatch' + assert torch.allclose(pufferl_python.values[:, i], pufferl_cpp.values[:, i], atol=1e-5), f'Value {i} mismatch' python_params = dict(policy.named_parameters()) for k, v in pufferl_cpp.pufferl_cpp.policy_32.named_parameters(): @@ -870,7 +894,7 @@ def check(env_name): v_python = python_params[k].data print(k, v.view(-1)[0]) - assert torch.allclose(v, v_python), k + assert torch.allclose(v, v_python, atol=1e-5) print('Check passed') diff --git a/pufferlib/python_pufferl.py b/pufferlib/python_pufferl.py index 8a771204e..158362c61 100644 --- a/pufferlib/python_pufferl.py +++ b/pufferlib/python_pufferl.py @@ -138,6 +138,10 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): betas=(config['adam_beta1'], config['adam_beta2']), eps=config['adam_eps'], ) + optimizer = torch.optim.SGD( + self.policy.parameters(), + lr=config['learning_rate'], + ) elif config['optimizer'] == 'muon': from heavyball import ForeachMuon warnings.filterwarnings(action='ignore', category=UserWarning, module=r'heavyball.*') @@ -321,6 +325,7 @@ def train(self): self.ratio[:] = 1 num_minibatches = config['num_minibatches'] + #num_minibatches = 1 for mb in range(num_minibatches): profile('train_misc', epoch, nest=True) self.amp_context.__enter__() @@ -331,6 +336,8 @@ def train(self): self.terminals, self.ratio, advantages, config['gamma'], config['gae_lambda'], config['vtrace_rho_clip'], config['vtrace_c_clip']) + #print("Adv Py", advantages.mean().item()) + profile('train_copy', epoch) adv = advantages.abs().sum(axis=1) prio_weights = torch.nan_to_num(adv**a, 0, 0, 0) @@ -338,6 +345,9 @@ def train(self): idx = torch.multinomial(prio_probs, self.minibatch_segments, replacement=True) mb_prio = (self.segments*prio_probs[idx, None])**-anneal_beta + + #print("Prio Py", mb_prio.mean().item()) + mb_obs = self.observations[idx] mb_actions = self.actions[idx] mb_logprobs = self.logprobs[idx] @@ -349,6 +359,17 @@ def train(self): mb_returns = advantages[idx] + mb_values mb_advantages = advantages[idx] + #print("mb_obs Py", mb_obs.sum()) + #print("mb_actions Py", mb_actions.sum()) + #print("mb_logprobs Py", mb_logprobs.min()) + #print("mb_rewards Py", mb_rewards.min()) + #print("mb_terminals Py", mb_terminals.min()) + #print("mb_truncations Py", mb_truncations.min()) + #print("mb_ratio Py", mb_ratio.min()) + #print("mb_values Py", mb_values.min()) + #print("mb_returns Py", mb_returns.min()) + #print("mb_advantages Py", mb_advantages.min()) + profile('train_forward', epoch) if not config['use_rnn']: mb_obs = mb_obs.reshape(-1, *self.vecenv.single_observation_space.shape) @@ -360,6 +381,7 @@ def train(self): ) logits, newvalue = self.policy(mb_obs, state) + #print("logits Py", logits.min().item()) actions, newlogprob, entropy = pufferlib.pytorch.sample_logits(logits, action=mb_actions) profile('train_misc', epoch) @@ -368,6 +390,11 @@ def train(self): ratio = logratio.exp() self.ratio[idx] = ratio.detach() + #print("newlogprob Py", newlogprob.min().item()) + #print("entropy Py", entropy.min()) + #print(f"logratio_new Py {logratio.min().item():.16f}") + #print(f"ratio Py {ratio.min().item():.16f}") + with torch.no_grad(): old_approx_kl = (-logratio).mean() approx_kl = ((ratio - 1) - logratio).mean() @@ -380,20 +407,27 @@ def train(self): adv = mb_advantages adv = mb_prio * (adv - adv.mean()) / (adv.std() + 1e-8) + #print("new advantage", adv.min().item()) + # Losses pg_loss1 = -adv * ratio pg_loss2 = -adv * torch.clamp(ratio, 1 - clip_coef, 1 + clip_coef) pg_loss = torch.max(pg_loss1, pg_loss2).mean() + #print("pg_loss Py", pg_loss.item()) + newvalue = newvalue.view(mb_returns.shape) v_clipped = mb_values + torch.clamp(newvalue - mb_values, -vf_clip, vf_clip) v_loss_unclipped = (newvalue - mb_returns) ** 2 v_loss_clipped = (v_clipped - mb_returns) ** 2 v_loss = 0.5*torch.max(v_loss_unclipped, v_loss_clipped).mean() + #print("v_loss Py", v_loss.item()) + entropy_loss = entropy.mean() loss = pg_loss + config['vf_coef']*v_loss - config['ent_coef']*entropy_loss + #print("loss Py", loss.item()) self.amp_context.__enter__() # TODO: AMP needs some debugging # This breaks vloss clipping? @@ -414,6 +448,12 @@ def train(self): loss.backward() if (mb + 1) % self.accumulate_minibatches == 0: torch.nn.utils.clip_grad_norm_(self.policy.parameters(), config['max_grad_norm']) + # Print grads + #for param in self.policy.parameters(): + # print(param.grad.abs().sum()) + + # Print current lr + #print(f'Current lr: {self.optimizer.param_groups[0]["lr"]}') self.optimizer.step() self.optimizer.zero_grad() From d38cec676b915719e386c4e8a6dc18840bfe52ba Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 30 Oct 2025 23:22:47 +0000 Subject: [PATCH 085/188] fixed the bug! Was the lstm shared init for cpp --- pufferlib/extensions/pufferlib.cpp | 57 ++++++++++++++++++++++-------- pufferlib/models.py | 2 -- pufferlib/pufferl.py | 11 ++++++ pufferlib/python_pufferl.py | 54 ++++++++++++++-------------- 4 files changed, 81 insertions(+), 43 deletions(-) diff --git a/pufferlib/extensions/pufferlib.cpp b/pufferlib/extensions/pufferlib.cpp index f3431d304..ba695eb5a 100644 --- a/pufferlib/extensions/pufferlib.cpp +++ b/pufferlib/extensions/pufferlib.cpp @@ -115,6 +115,22 @@ void set_policy(torch::Tensor serialized_policy) { g_policy.eval(); } */ +struct ShareableLSTMCell : public torch::nn::LSTMCellImpl { + ShareableLSTMCell(const torch::nn::LSTMCellOptions& options) : torch::nn::LSTMCellImpl(options) {} + + void set_shared_weights(torch::Tensor w_ih, torch::Tensor w_hh, torch::Tensor b_ih, torch::Tensor b_hh) { + weight_ih = w_ih; + weight_hh = w_hh; + bias_ih = b_ih; + bias_hh = b_hh; + + // Remove the original (unused) tensors from the parameter dict to avoid waste + parameters_.erase("weight_ih"); + parameters_.erase("weight_hh"); + parameters_.erase("bias_ih"); + parameters_.erase("bias_hh"); + } +}; class PolicyLSTM : public torch::nn::Module { private: @@ -125,7 +141,8 @@ class PolicyLSTM : public torch::nn::Module { torch::nn::Linear decoder{nullptr}; torch::nn::Linear value{nullptr}; torch::nn::LSTM lstm{nullptr}; - torch::nn::LSTMCell cell{nullptr}; + //torch::nn::LSTMCell cell{nullptr}; + std::shared_ptr cell{nullptr}; public: // Constructor: input_size instead of grid_size @@ -158,13 +175,23 @@ class PolicyLSTM : public torch::nn::Module { lstm->named_parameters()["bias_ih_l0"].data().zero_(); lstm->named_parameters()["bias_hh_l0"].data().zero_(); + // ... (your existing lstm creation and init) + + cell = register_module("cell", std::make_shared(torch::nn::LSTMCellOptions(hidden_size_, hidden_size_))); + cell->set_shared_weights(lstm->named_parameters()["weight_ih_l0"], + lstm->named_parameters()["weight_hh_l0"], + lstm->named_parameters()["bias_ih_l0"], + lstm->named_parameters()["bias_hh_l0"]); + /* // Share weights between LSTM and LSTMCell. Do not register or you'll double-update during optim. - cell = torch::nn::LSTMCell(hidden_size_, hidden_size_); + //cell = torch::nn::LSTMCell(hidden_size_, hidden_size_); + cell = register_module("cell", torch::nn::LSTMCell(hidden_size_, hidden_size_)); cell->named_parameters()["weight_ih"].data() = lstm->named_parameters()["weight_ih_l0"].data(); cell->named_parameters()["weight_hh"].data() = lstm->named_parameters()["weight_hh_l0"].data(); cell->named_parameters()["bias_ih"].data() = lstm->named_parameters()["bias_ih_l0"].data(); cell->named_parameters()["bias_hh"].data() = lstm->named_parameters()["bias_hh_l0"].data(); //cell->to(torch::kCUDA); + */ } // Forward for evaluation/inference (uses LSTMCell) @@ -278,8 +305,8 @@ void sync_fp16_fp32(pufferlib::PolicyLSTM* policy_16, pufferlib::PolicyLSTM* pol typedef struct { PolicyLSTM* policy_16; PolicyLSTM* policy_32; - //torch::optim::Adam* optimizer; - torch::optim::SGD* optimizer; + torch::optim::Adam* optimizer; + //torch::optim::SGD* optimizer; double lr; int64_t max_epochs; } PuffeRL; @@ -312,8 +339,8 @@ std::unique_ptr create_pufferl(int64_t input_size, auto policy_32 = new PolicyLSTM(input_size, num_atns, hidden_size); //policy_32->to(torch::kCUDA); - //auto optimizer = new torch::optim::Adam(policy_32->parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)); - auto optimizer = new torch::optim::SGD(policy_32->parameters(), torch::optim::SGDOptions(lr)); + auto optimizer = new torch::optim::Adam(policy_32->parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)); + //auto optimizer = new torch::optim::SGD(policy_32->parameters(), torch::optim::SGDOptions(lr)); auto pufferl = std::make_unique(); pufferl->policy_16 = policy_16; @@ -370,6 +397,7 @@ std::tuple compiled_evaluate( { pybind11::gil_scoped_release no_gil; step_environments_cuda(envs_tensor, indices_tensor); + torch::cuda::synchronize(); } rewards.clamp_(-1.0f, 1.0f); } @@ -508,14 +536,12 @@ pybind11::dict compiled_train( torch::Tensor mb_advantages = advantages.index_select(0, idx); torch::Tensor mb_returns = mb_advantages + mb_values; - /* - std::cout << "mb_obs: " << mb_obs.sum() << std::endl; - std::cout << "mb_actions: " << mb_actions.sum() << std::endl; - std::cout << "mb_logprobs: " << mb_logprobs.min() << std::endl; - std::cout << "mb_values: " << mb_values.min() << std::endl; - std::cout << "mb_advantages: " << mb_advantages.min() << std::endl; - std::cout << "mb_returns: " << mb_returns.min() << std::endl; - */ + //std::cout << "mb_obs: " << mb_obs.sum() << std::endl; + //std::cout << "mb_actions: " << mb_actions.sum() << std::endl; + //std::cout << "mb_logprobs: " << mb_logprobs.min() << std::endl; + //std::cout << "mb_values: " << mb_values.min() << std::endl; + //std::cout << "mb_advantages: " << mb_advantages.min() << std::endl; + //std::cout << "mb_returns: " << mb_returns.min() << std::endl; // Reshape obs if not using RNN if (!use_rnn) { @@ -561,7 +587,6 @@ pybind11::dict compiled_train( // Update global ratio and values in-place (matches Python) ratio.index_copy_(0, idx, ratio_new.detach().squeeze(-1).to(torch::kFloat32)); - values.index_copy_(0, idx, newvalue.detach().squeeze(-1).to(torch::kFloat32)); // Normalize advantages: (adv - mean) / std, then weight auto adv_normalized = mb_advantages; @@ -583,6 +608,8 @@ pybind11::dict compiled_train( auto v_loss_clipped = (v_clipped - mb_returns).pow(2); auto v_loss = 0.5 * torch::max(v_loss_unclipped, v_loss_clipped).mean(); + values.index_copy_(0, idx, newvalue.detach().squeeze(-1).to(torch::kFloat32)); + //std::cout << "v_loss: " << v_loss << std::endl; // Entropy diff --git a/pufferlib/models.py b/pufferlib/models.py index 3eebc86ee..7c7254577 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -163,7 +163,6 @@ def forward(self, observations, state): hidden = hidden.transpose(0, 1) #hidden = self.pre_layernorm(hidden) hidden, (lstm_h, lstm_c) = self.lstm.forward(hidden, lstm_state) - hidden = hidden.float() #hidden = self.post_layernorm(hidden) hidden = hidden.transpose(0, 1) @@ -267,7 +266,6 @@ def forward(self, observations, state): hidden = hidden.transpose(0, 1) #hidden = self.pre_layernorm(hidden) hidden, (lstm_h, lstm_c) = self.lstm.forward(hidden, lstm_state) - hidden = hidden.float() #hidden = self.post_layernorm(hidden) hidden = hidden.transpose(0, 1) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index c4a83ef64..64410c2c6 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -866,11 +866,14 @@ def check(env_name): torch.manual_seed(args['train']['seed']) pufferl_python.evaluate() pufferl_python.train() + pufferl_python.evaluate() torch.manual_seed(args['train']['seed']) pufferl_cpp.evaluate() pufferl_cpp.train() + pufferl_cpp.evaluate() + ''' for i in range(args['train']['bptt_horizon']): assert torch.allclose(pufferl_python.observations[:, i].float(), pufferl_cpp.observations[:, i]), f'Observation {i} mismatch' assert torch.allclose(pufferl_python.actions[:, i], pufferl_cpp.actions[:, i]), f'Action {i} mismatch' @@ -878,11 +881,19 @@ def check(env_name): assert torch.allclose(pufferl_python.terminals[:, i], pufferl_cpp.terminals[:, i]), f'Terminal {i} mismatch' assert torch.allclose(pufferl_python.logprobs[:, i], pufferl_cpp.logprobs[:, i]), f'Logprob {i} mismatch' assert torch.allclose(pufferl_python.values[:, i], pufferl_cpp.values[:, i], atol=1e-5), f'Value {i} mismatch' + ''' + pol = pufferl_cpp.pufferl_cpp.policy_32 + pol = pufferl_cpp.pufferl_cpp.policy_32.named_parameters() + cpp_lstm = pol['lstm.weight_ih_l0'] + cpp_lstm_cell = pol['cell.weight_ih'] + assert torch.allclose(policy.lstm.weight_ih_l0, policy.cell.weight_ih) + assert torch.allclose(cpp_lstm, cpp_lstm_cell) python_params = dict(policy.named_parameters()) for k, v in pufferl_cpp.pufferl_cpp.policy_32.named_parameters(): # For some reason, cpp records twice if k == 'cell.weight_ih': + breakpoint() v_python = python_params['lstm.weight_ih_l0'].data elif k == 'cell.weight_hh': v_python = python_params['lstm.weight_hh_l0'].data diff --git a/pufferlib/python_pufferl.py b/pufferlib/python_pufferl.py index 158362c61..ef8927808 100644 --- a/pufferlib/python_pufferl.py +++ b/pufferlib/python_pufferl.py @@ -138,10 +138,12 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): betas=(config['adam_beta1'], config['adam_beta2']), eps=config['adam_eps'], ) + ''' optimizer = torch.optim.SGD( self.policy.parameters(), lr=config['learning_rate'], ) + ''' elif config['optimizer'] == 'muon': from heavyball import ForeachMuon warnings.filterwarnings(action='ignore', category=UserWarning, module=r'heavyball.*') @@ -336,7 +338,7 @@ def train(self): self.terminals, self.ratio, advantages, config['gamma'], config['gae_lambda'], config['vtrace_rho_clip'], config['vtrace_c_clip']) - #print("Adv Py", advantages.mean().item()) + print("Adv Py", advantages.mean().item()) profile('train_copy', epoch) adv = advantages.abs().sum(axis=1) @@ -346,7 +348,7 @@ def train(self): self.minibatch_segments, replacement=True) mb_prio = (self.segments*prio_probs[idx, None])**-anneal_beta - #print("Prio Py", mb_prio.mean().item()) + print("Prio Py", mb_prio.mean().item()) mb_obs = self.observations[idx] mb_actions = self.actions[idx] @@ -359,16 +361,16 @@ def train(self): mb_returns = advantages[idx] + mb_values mb_advantages = advantages[idx] - #print("mb_obs Py", mb_obs.sum()) - #print("mb_actions Py", mb_actions.sum()) - #print("mb_logprobs Py", mb_logprobs.min()) - #print("mb_rewards Py", mb_rewards.min()) - #print("mb_terminals Py", mb_terminals.min()) - #print("mb_truncations Py", mb_truncations.min()) - #print("mb_ratio Py", mb_ratio.min()) - #print("mb_values Py", mb_values.min()) - #print("mb_returns Py", mb_returns.min()) - #print("mb_advantages Py", mb_advantages.min()) + print("mb_obs Py", mb_obs.sum()) + print("mb_actions Py", mb_actions.sum()) + print("mb_logprobs Py", mb_logprobs.min()) + print("mb_rewards Py", mb_rewards.min()) + print("mb_terminals Py", mb_terminals.min()) + print("mb_truncations Py", mb_truncations.min()) + print("mb_ratio Py", mb_ratio.min()) + print("mb_values Py", mb_values.min()) + print("mb_returns Py", mb_returns.min()) + print("mb_advantages Py", mb_advantages.min()) profile('train_forward', epoch) if not config['use_rnn']: @@ -381,7 +383,7 @@ def train(self): ) logits, newvalue = self.policy(mb_obs, state) - #print("logits Py", logits.min().item()) + print("logits Py", logits.min().item()) actions, newlogprob, entropy = pufferlib.pytorch.sample_logits(logits, action=mb_actions) profile('train_misc', epoch) @@ -390,31 +392,31 @@ def train(self): ratio = logratio.exp() self.ratio[idx] = ratio.detach() - #print("newlogprob Py", newlogprob.min().item()) - #print("entropy Py", entropy.min()) - #print(f"logratio_new Py {logratio.min().item():.16f}") - #print(f"ratio Py {ratio.min().item():.16f}") + print("newlogprob Py", newlogprob.min().item()) + print("entropy Py", entropy.min()) + print(f"logratio_new Py {logratio.min().item():.16f}") + print(f"ratio Py {ratio.min().item():.16f}") with torch.no_grad(): old_approx_kl = (-logratio).mean() approx_kl = ((ratio - 1) - logratio).mean() clipfrac = ((ratio - 1.0).abs() > config['clip_coef']).float().mean() - adv = advantages[idx] - adv = compute_puff_advantage(mb_values, mb_rewards, mb_terminals, - ratio, adv, config['gamma'], config['gae_lambda'], - config['vtrace_rho_clip'], config['vtrace_c_clip']) + #adv = advantages[idx] + #adv = compute_puff_advantage(mb_values, mb_rewards, mb_terminals, + # ratio, adv, config['gamma'], config['gae_lambda'], + # config['vtrace_rho_clip'], config['vtrace_c_clip']) adv = mb_advantages adv = mb_prio * (adv - adv.mean()) / (adv.std() + 1e-8) - #print("new advantage", adv.min().item()) + print("new advantage", adv.min().item()) # Losses pg_loss1 = -adv * ratio pg_loss2 = -adv * torch.clamp(ratio, 1 - clip_coef, 1 + clip_coef) pg_loss = torch.max(pg_loss1, pg_loss2).mean() - #print("pg_loss Py", pg_loss.item()) + print("pg_loss Py", pg_loss.item()) newvalue = newvalue.view(mb_returns.shape) v_clipped = mb_values + torch.clamp(newvalue - mb_values, -vf_clip, vf_clip) @@ -422,12 +424,12 @@ def train(self): v_loss_clipped = (v_clipped - mb_returns) ** 2 v_loss = 0.5*torch.max(v_loss_unclipped, v_loss_clipped).mean() - #print("v_loss Py", v_loss.item()) + print("v_loss Py", v_loss.item()) entropy_loss = entropy.mean() loss = pg_loss + config['vf_coef']*v_loss - config['ent_coef']*entropy_loss - #print("loss Py", loss.item()) + print("loss Py", loss.item()) self.amp_context.__enter__() # TODO: AMP needs some debugging # This breaks vloss clipping? @@ -453,7 +455,7 @@ def train(self): # print(param.grad.abs().sum()) # Print current lr - #print(f'Current lr: {self.optimizer.param_groups[0]["lr"]}') + print(f'Current lr: {self.optimizer.param_groups[0]["lr"]}') self.optimizer.step() self.optimizer.zero_grad() From e94ea3520c275e6cd1e36e2bdd01572b1fbe6eec Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 31 Oct 2025 16:08:21 +0000 Subject: [PATCH 086/188] CPU check pass --- pufferlib/pufferl.py | 48 +++++++------------------------------ pufferlib/python_pufferl.py | 26 ++++++++++---------- 2 files changed, 20 insertions(+), 54 deletions(-) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 64410c2c6..a55e59487 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -831,12 +831,13 @@ def download(self): return f'{data_dir}/{model_file}' def check(env_name): - args = load_config(env_name) - vecenv = load_env(env_name, args) + torch.set_printoptions(precision=16) + args = load_config(env_name) args['train']['optimizer'] = 'adam' - torch.set_printoptions(precision=16) + vecenv = load_env(env_name, args) + torch.manual_seed(args['train']['seed']) policy = load_policy(args, vecenv, env_name) @@ -845,23 +846,11 @@ def check(env_name): pufferl_python = pufferlib.python_pufferl.PuffeRL(train_config, vecenv, policy, verbose=False) pufferl_cpp = PuffeRL(train_config, verbose=False) + python_params = dict(policy.named_parameters()) for k, v in pufferl_cpp.pufferl_cpp.policy_32.named_parameters(): - # For some reason, cpp records twice - if k == 'cell.weight_ih': - v_python = python_params['lstm.weight_ih_l0'].data - elif k == 'cell.weight_hh': - v_python = python_params['lstm.weight_hh_l0'].data - elif k == 'cell.bias_ih': - v_python = python_params['lstm.bias_ih_l0'].data - elif k == 'cell.bias_hh': - v_python = python_params['lstm.bias_hh_l0'].data - else: - v_python = python_params[k].data - - print(k, v.view(-1)[0]) - assert torch.allclose(v, v_python), k - + v_python = python_params[k].data + assert torch.allclose(v, v_python) torch.manual_seed(args['train']['seed']) pufferl_python.evaluate() @@ -873,7 +862,6 @@ def check(env_name): pufferl_cpp.train() pufferl_cpp.evaluate() - ''' for i in range(args['train']['bptt_horizon']): assert torch.allclose(pufferl_python.observations[:, i].float(), pufferl_cpp.observations[:, i]), f'Observation {i} mismatch' assert torch.allclose(pufferl_python.actions[:, i], pufferl_cpp.actions[:, i]), f'Action {i} mismatch' @@ -881,30 +869,10 @@ def check(env_name): assert torch.allclose(pufferl_python.terminals[:, i], pufferl_cpp.terminals[:, i]), f'Terminal {i} mismatch' assert torch.allclose(pufferl_python.logprobs[:, i], pufferl_cpp.logprobs[:, i]), f'Logprob {i} mismatch' assert torch.allclose(pufferl_python.values[:, i], pufferl_cpp.values[:, i], atol=1e-5), f'Value {i} mismatch' - ''' - pol = pufferl_cpp.pufferl_cpp.policy_32 - pol = pufferl_cpp.pufferl_cpp.policy_32.named_parameters() - cpp_lstm = pol['lstm.weight_ih_l0'] - cpp_lstm_cell = pol['cell.weight_ih'] - assert torch.allclose(policy.lstm.weight_ih_l0, policy.cell.weight_ih) - assert torch.allclose(cpp_lstm, cpp_lstm_cell) python_params = dict(policy.named_parameters()) for k, v in pufferl_cpp.pufferl_cpp.policy_32.named_parameters(): - # For some reason, cpp records twice - if k == 'cell.weight_ih': - breakpoint() - v_python = python_params['lstm.weight_ih_l0'].data - elif k == 'cell.weight_hh': - v_python = python_params['lstm.weight_hh_l0'].data - elif k == 'cell.bias_ih': - v_python = python_params['lstm.bias_ih_l0'].data - elif k == 'cell.bias_hh': - v_python = python_params['lstm.bias_hh_l0'].data - else: - v_python = python_params[k].data - - print(k, v.view(-1)[0]) + v_python = python_params[k].data assert torch.allclose(v, v_python, atol=1e-5) print('Check passed') diff --git a/pufferlib/python_pufferl.py b/pufferlib/python_pufferl.py index ef8927808..830f60262 100644 --- a/pufferlib/python_pufferl.py +++ b/pufferlib/python_pufferl.py @@ -138,12 +138,6 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): betas=(config['adam_beta1'], config['adam_beta2']), eps=config['adam_eps'], ) - ''' - optimizer = torch.optim.SGD( - self.policy.parameters(), - lr=config['learning_rate'], - ) - ''' elif config['optimizer'] == 'muon': from heavyball import ForeachMuon warnings.filterwarnings(action='ignore', category=UserWarning, module=r'heavyball.*') @@ -338,7 +332,7 @@ def train(self): self.terminals, self.ratio, advantages, config['gamma'], config['gae_lambda'], config['vtrace_rho_clip'], config['vtrace_c_clip']) - print("Adv Py", advantages.mean().item()) + #print("Adv Py", advantages.mean().item()) profile('train_copy', epoch) adv = advantages.abs().sum(axis=1) @@ -348,7 +342,7 @@ def train(self): self.minibatch_segments, replacement=True) mb_prio = (self.segments*prio_probs[idx, None])**-anneal_beta - print("Prio Py", mb_prio.mean().item()) + #print("Prio Py", mb_prio.mean().item()) mb_obs = self.observations[idx] mb_actions = self.actions[idx] @@ -361,6 +355,7 @@ def train(self): mb_returns = advantages[idx] + mb_values mb_advantages = advantages[idx] + ''' print("mb_obs Py", mb_obs.sum()) print("mb_actions Py", mb_actions.sum()) print("mb_logprobs Py", mb_logprobs.min()) @@ -371,6 +366,7 @@ def train(self): print("mb_values Py", mb_values.min()) print("mb_returns Py", mb_returns.min()) print("mb_advantages Py", mb_advantages.min()) + ''' profile('train_forward', epoch) if not config['use_rnn']: @@ -383,7 +379,7 @@ def train(self): ) logits, newvalue = self.policy(mb_obs, state) - print("logits Py", logits.min().item()) + #print("logits Py", logits.min().item()) actions, newlogprob, entropy = pufferlib.pytorch.sample_logits(logits, action=mb_actions) profile('train_misc', epoch) @@ -392,10 +388,12 @@ def train(self): ratio = logratio.exp() self.ratio[idx] = ratio.detach() + ''' print("newlogprob Py", newlogprob.min().item()) print("entropy Py", entropy.min()) print(f"logratio_new Py {logratio.min().item():.16f}") print(f"ratio Py {ratio.min().item():.16f}") + ''' with torch.no_grad(): old_approx_kl = (-logratio).mean() @@ -409,14 +407,14 @@ def train(self): adv = mb_advantages adv = mb_prio * (adv - adv.mean()) / (adv.std() + 1e-8) - print("new advantage", adv.min().item()) + #print("new advantage", adv.min().item()) # Losses pg_loss1 = -adv * ratio pg_loss2 = -adv * torch.clamp(ratio, 1 - clip_coef, 1 + clip_coef) pg_loss = torch.max(pg_loss1, pg_loss2).mean() - print("pg_loss Py", pg_loss.item()) + #print("pg_loss Py", pg_loss.item()) newvalue = newvalue.view(mb_returns.shape) v_clipped = mb_values + torch.clamp(newvalue - mb_values, -vf_clip, vf_clip) @@ -424,12 +422,12 @@ def train(self): v_loss_clipped = (v_clipped - mb_returns) ** 2 v_loss = 0.5*torch.max(v_loss_unclipped, v_loss_clipped).mean() - print("v_loss Py", v_loss.item()) + #print("v_loss Py", v_loss.item()) entropy_loss = entropy.mean() loss = pg_loss + config['vf_coef']*v_loss - config['ent_coef']*entropy_loss - print("loss Py", loss.item()) + #print("loss Py", loss.item()) self.amp_context.__enter__() # TODO: AMP needs some debugging # This breaks vloss clipping? @@ -455,7 +453,7 @@ def train(self): # print(param.grad.abs().sum()) # Print current lr - print(f'Current lr: {self.optimizer.param_groups[0]["lr"]}') + #print(f'Current lr: {self.optimizer.param_groups[0]["lr"]}') self.optimizer.step() self.optimizer.zero_grad() From 9c4fef7e2e2c01f6e77fb2b298340e4d058bfef6 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 31 Oct 2025 16:28:56 +0000 Subject: [PATCH 087/188] Enable gpu. Check fails, trains, 4msps) --- pufferlib/extensions/pufferlib.cpp | 28 ++++++++++++++-------------- pufferlib/pufferl.py | 3 ++- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/pufferlib/extensions/pufferlib.cpp b/pufferlib/extensions/pufferlib.cpp index ba695eb5a..c0b00473f 100644 --- a/pufferlib/extensions/pufferlib.cpp +++ b/pufferlib/extensions/pufferlib.cpp @@ -333,11 +333,11 @@ std::unique_ptr create_pufferl(int64_t input_size, torch::manual_seed(42); auto policy_16 = new PolicyLSTM(input_size, num_atns, hidden_size); - //policy_16->to(torch::kCUDA); + policy_16->to(torch::kCUDA); policy_16->to(torch::kFloat32); auto policy_32 = new PolicyLSTM(input_size, num_atns, hidden_size); - //policy_32->to(torch::kCUDA); + policy_32->to(torch::kCUDA); auto optimizer = new torch::optim::Adam(policy_32->parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)); //auto optimizer = new torch::optim::SGD(policy_32->parameters(), torch::optim::SGDOptions(lr)); @@ -377,7 +377,7 @@ std::tuple compiled_evaluate( torch::NoGradGuard no_grad; for (int64_t i = 0; i < horizon; ++i) { - auto [logits, value, lstm_h_out, lstm_c_out] = policy->forward(obs.to(torch::kFloat32).to(torch::kCPU), lstm_h, lstm_c); + auto [logits, value, lstm_h_out, lstm_c_out] = policy->forward(obs.to(torch::kFloat32), lstm_h, lstm_c); lstm_h = lstm_h_out; lstm_c = lstm_c_out; @@ -386,18 +386,18 @@ std::tuple compiled_evaluate( auto logprob = logprobs.gather(1, action.unsqueeze(1)).squeeze(1); // Store - obs_buffer.select(1, i).copy_(obs.to(torch::kFloat32).to(torch::kCPU)); - act_buffer.select(1, i).copy_(action.to(torch::kInt32).to(torch::kCPU)); - logprob_buffer.select(1, i).copy_(logprob.to(torch::kFloat32).to(torch::kCPU)); - rew_buffer.select(1, i).copy_(rewards.to(torch::kFloat32).to(torch::kCPU)); - term_buffer.select(1, i).copy_(terminals.to(torch::kFloat32).to(torch::kCPU)); - val_buffer.select(1, i).copy_(value.flatten().to(torch::kFloat32).to(torch::kCPU)); - - actions.copy_(action.to(torch::kCUDA)); + obs_buffer.select(1, i).copy_(obs.to(torch::kFloat32)); + act_buffer.select(1, i).copy_(action.to(torch::kInt32)); + logprob_buffer.select(1, i).copy_(logprob.to(torch::kFloat32)); + rew_buffer.select(1, i).copy_(rewards.to(torch::kFloat32)); + term_buffer.select(1, i).copy_(terminals.to(torch::kFloat32)); + val_buffer.select(1, i).copy_(value.flatten().to(torch::kFloat32)); + + actions.copy_(action); { pybind11::gil_scoped_release no_gil; step_environments_cuda(envs_tensor, indices_tensor); - torch::cuda::synchronize(); + //torch::cuda::synchronize(); } rewards.clamp_(-1.0f, 1.0f); } @@ -512,7 +512,7 @@ pybind11::dict compiled_train( for (int64_t mb = 0; mb < total_minibatches; ++mb) { //for (int64_t mb = 0; mb < 1; ++mb) { advantages = torch::zeros_like(values); - compute_puff_advantage_cpu( + compute_puff_advantage_cuda( values, rewards, terminals, ratio, advantages, gamma, gae_lambda, vtrace_rho_clip, vtrace_c_clip @@ -558,7 +558,7 @@ pybind11::dict compiled_train( torch::Tensor mb_lstm_c = torch::zeros_like(mb_lstm_h); // Forward pass - auto [logits, newvalue] = policy_32->forward_train(mb_obs.to(torch::kFloat32).to(torch::kCPU), mb_lstm_h, mb_lstm_c); + auto [logits, newvalue] = policy_32->forward_train(mb_obs.to(torch::kFloat32), mb_lstm_h, mb_lstm_c); //std::cout << "logits: " << logits.mean() << std::endl; diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index a55e59487..accde9034 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -867,7 +867,8 @@ def check(env_name): assert torch.allclose(pufferl_python.actions[:, i], pufferl_cpp.actions[:, i]), f'Action {i} mismatch' assert torch.allclose(pufferl_python.rewards[:, i], pufferl_cpp.rewards[:, i]), f'Reward {i} mismatch' assert torch.allclose(pufferl_python.terminals[:, i], pufferl_cpp.terminals[:, i]), f'Terminal {i} mismatch' - assert torch.allclose(pufferl_python.logprobs[:, i], pufferl_cpp.logprobs[:, i]), f'Logprob {i} mismatch' + breakpoint() + assert torch.allclose(pufferl_python.logprobs[:, i], pufferl_cpp.logprobs[:, i], atol=1e-5), f'Logprob {i} mismatch' assert torch.allclose(pufferl_python.values[:, i], pufferl_cpp.values[:, i], atol=1e-5), f'Value {i} mismatch' python_params = dict(policy.named_parameters()) From 6ca97e0afef6a1e9b4041a8811b2b2b7b12bedca Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 31 Oct 2025 17:45:59 +0000 Subject: [PATCH 088/188] bf16. Slower for now because missing kernel. May need to do 32b accum --- pufferlib/extensions/cuda/squared_torch.cu | 2 - pufferlib/extensions/pufferlib.cpp | 149 +++++++-------------- pufferlib/ocean/squared/squared.h | 2 - pufferlib/pufferl.py | 8 +- pufferlib/python_pufferl.py | 52 +------ 5 files changed, 57 insertions(+), 156 deletions(-) diff --git a/pufferlib/extensions/cuda/squared_torch.cu b/pufferlib/extensions/cuda/squared_torch.cu index cd3673a33..8f137a30b 100644 --- a/pufferlib/extensions/cuda/squared_torch.cu +++ b/pufferlib/extensions/cuda/squared_torch.cu @@ -59,11 +59,9 @@ __device__ void cuda_reset(Squared* env, curandState* rng) { env->c = env->size/2; env->tick = 0; int target_idx = 0; // Deterministic for testing - /* do { target_idx = curand(rng) % tiles; } while (target_idx == tiles/2); - */ env->observations[target_idx] = TARGET; } diff --git a/pufferlib/extensions/pufferlib.cpp b/pufferlib/extensions/pufferlib.cpp index c0b00473f..65f8fca73 100644 --- a/pufferlib/extensions/pufferlib.cpp +++ b/pufferlib/extensions/pufferlib.cpp @@ -148,11 +148,6 @@ class PolicyLSTM : public torch::nn::Module { // Constructor: input_size instead of grid_size PolicyLSTM(int64_t input_size, int64_t num_atns, int64_t hidden_size = 128) : input_size_(input_size), hidden_size_(hidden_size), num_atns_(num_atns) { - - torch::globalContext().setDeterministicCuDNN(true); - torch::globalContext().setBenchmarkCuDNN(false); - torch::manual_seed(42); - torch::cuda::manual_seed(42); encoder = register_module("encoder", torch::nn::Sequential( torch::nn::Linear(input_size_, hidden_size_), torch::nn::GELU() @@ -303,10 +298,8 @@ void sync_fp16_fp32(pufferlib::PolicyLSTM* policy_16, pufferlib::PolicyLSTM* pol } typedef struct { - PolicyLSTM* policy_16; - PolicyLSTM* policy_32; + PolicyLSTM* policy; torch::optim::Adam* optimizer; - //torch::optim::SGD* optimizer; double lr; int64_t max_epochs; } PuffeRL; @@ -314,37 +307,33 @@ typedef struct { std::unique_ptr create_pufferl(int64_t input_size, int64_t num_atns, int64_t hidden_size, double lr, double beta1, double beta2, double eps, int64_t max_epochs) { + // Seeding + torch::manual_seed(42); + torch::cuda::manual_seed(42); + // Enable cuDNN benchmarking - //torch::globalContext().setBenchmarkCuDNN(true); - //torch::globalContext().setDeterministicCuDNN(false); - //torch::globalContext().setBenchmarkLimitCuDNN(32); + torch::globalContext().setBenchmarkCuDNN(true); + torch::globalContext().setDeterministicCuDNN(false); + torch::globalContext().setBenchmarkLimitCuDNN(32); // Enable TF32 for faster FP32 math (uses Tensor Cores on 4090) - //torch::globalContext().setAllowTF32CuBLAS(true); - //torch::globalContext().setAllowTF32CuDNN(true); + torch::globalContext().setAllowTF32CuBLAS(true); + torch::globalContext().setAllowTF32CuDNN(true); // Enable faster FP16 reductions - //torch::globalContext().setAllowFP16ReductionCuBLAS(true); + torch::globalContext().setAllowFP16ReductionCuBLAS(true); // BF16 reduction (if using bfloat16) - //torch::globalContext().setAllowBF16ReductionCuBLAS(true); - - // Random seed - torch::manual_seed(42); - - auto policy_16 = new PolicyLSTM(input_size, num_atns, hidden_size); - policy_16->to(torch::kCUDA); - policy_16->to(torch::kFloat32); + torch::globalContext().setAllowBF16ReductionCuBLAS(true); - auto policy_32 = new PolicyLSTM(input_size, num_atns, hidden_size); - policy_32->to(torch::kCUDA); + auto policy = new PolicyLSTM(input_size, num_atns, hidden_size); + policy->to(torch::kCUDA); + policy->to(torch::kBFloat16); - auto optimizer = new torch::optim::Adam(policy_32->parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)); - //auto optimizer = new torch::optim::SGD(policy_32->parameters(), torch::optim::SGDOptions(lr)); + auto optimizer = new torch::optim::Adam(policy->parameters(), torch::optim::AdamOptions(lr).betas({beta1, beta2}).eps(eps)); auto pufferl = std::make_unique(); - pufferl->policy_16 = policy_16; - pufferl->policy_32 = policy_32; + pufferl->policy = policy; pufferl->optimizer = optimizer; pufferl->lr = lr; pufferl->max_epochs = max_epochs; @@ -371,13 +360,16 @@ std::tuple compiled_evaluate( int64_t horizon, int64_t num_envs ) { + torch::NoGradGuard no_grad; + auto& pufferl = pufferl_obj.cast(); - auto& policy = pufferl.policy_32; + auto& policy = pufferl.policy; - torch::NoGradGuard no_grad; + lstm_h = lstm_h.to(torch::kBFloat16); + lstm_c = lstm_c.to(torch::kBFloat16); for (int64_t i = 0; i < horizon; ++i) { - auto [logits, value, lstm_h_out, lstm_c_out] = policy->forward(obs.to(torch::kFloat32), lstm_h, lstm_c); + auto [logits, value, lstm_h_out, lstm_c_out] = policy->forward(obs.to(torch::kBFloat16), lstm_h, lstm_c); lstm_h = lstm_h_out; lstm_c = lstm_c_out; @@ -397,7 +389,6 @@ std::tuple compiled_evaluate( { pybind11::gil_scoped_release no_gil; step_environments_cuda(envs_tensor, indices_tensor); - //torch::cuda::synchronize(); } rewards.clamp_(-1.0f, 1.0f); } @@ -424,7 +415,7 @@ std::tuple evaluate_step( int64_t i ) { auto& pufferl = pufferl_obj.cast(); - auto& policy = pufferl.policy_32; + auto& policy = pufferl.policy; torch::NoGradGuard no_grad; @@ -480,7 +471,7 @@ pybind11::dict compiled_train( int64_t current_epoch ) { auto& pufferl = pufferl_obj.cast(); - auto& policy_32 = pufferl.policy_32; + auto& policy = pufferl.policy; auto& optimizer = pufferl.optimizer; auto device = values.device(); @@ -510,14 +501,13 @@ pybind11::dict compiled_train( auto advantages = torch::zeros_like(values); for (int64_t mb = 0; mb < total_minibatches; ++mb) { - //for (int64_t mb = 0; mb < 1; ++mb) { advantages = torch::zeros_like(values); compute_puff_advantage_cuda( values, rewards, terminals, ratio, advantages, gamma, gae_lambda, vtrace_rho_clip, vtrace_c_clip ); - //std::cout << "Adv: " << advantages.mean().item() << std::endl; + // Prioritization auto adv = advantages.abs().sum(1); // [num_envs] @@ -526,8 +516,6 @@ pybind11::dict compiled_train( auto idx = at::multinomial(prio_probs, minibatch_segments, true); auto mb_prio = torch::pow(segments*prio_probs.index_select(0, idx).unsqueeze(1), -anneal_beta); - //std::cout << "Prio: " << mb_prio.mean().item() << std::endl; - // Index into data torch::Tensor mb_obs = observations.index_select(0, idx); torch::Tensor mb_actions = actions.index_select(0, idx); @@ -536,13 +524,6 @@ pybind11::dict compiled_train( torch::Tensor mb_advantages = advantages.index_select(0, idx); torch::Tensor mb_returns = mb_advantages + mb_values; - //std::cout << "mb_obs: " << mb_obs.sum() << std::endl; - //std::cout << "mb_actions: " << mb_actions.sum() << std::endl; - //std::cout << "mb_logprobs: " << mb_logprobs.min() << std::endl; - //std::cout << "mb_values: " << mb_values.min() << std::endl; - //std::cout << "mb_advantages: " << mb_advantages.min() << std::endl; - //std::cout << "mb_returns: " << mb_returns.min() << std::endl; - // Reshape obs if not using RNN if (!use_rnn) { auto flat_shape = std::vector{-1, mb_obs.size(2), mb_obs.size(3)}; @@ -553,14 +534,12 @@ pybind11::dict compiled_train( // Initial LSTM states (zero or none) torch::Tensor mb_lstm_h = torch::zeros( {1, minibatch_segments, 128}, - torch::kFloat32 + torch::kBFloat16 ).to(device); torch::Tensor mb_lstm_c = torch::zeros_like(mb_lstm_h); // Forward pass - auto [logits, newvalue] = policy_32->forward_train(mb_obs.to(torch::kFloat32), mb_lstm_h, mb_lstm_c); - - //std::cout << "logits: " << logits.mean() << std::endl; + auto [logits, newvalue] = policy->forward_train(mb_obs.to(torch::kBFloat16), mb_lstm_h, mb_lstm_c); // Flatten for action lookup auto flat_logits = logits.reshape({-1, logits.size(-1)}); @@ -571,20 +550,12 @@ pybind11::dict compiled_train( // Gather logprobs for taken actions auto newlogprob_flat = logprobs_new.gather(1, flat_actions.unsqueeze(1)).squeeze(1); auto newlogprob = newlogprob_flat.reshape({minibatch_segments, horizon}); - auto entropy = - (probs_new * logprobs_new).sum(1); - - //std::cout << "newlogprob: " << newlogprob.min() << std::endl; - //std::cout << "entropy: " << entropy.min() << std::endl; - - entropy = entropy.mean(); + auto entropy = - (probs_new * logprobs_new).sum(1).mean(); // Compute ratio auto logratio = newlogprob - mb_logprobs; auto ratio_new = logratio.exp(); - //std::cout << "logratio_new: " << std::fixed << std::setprecision(20) << logratio.min().item() << std::endl; - //std::cout << "ratio_new: " << std::fixed << std::setprecision(20) << ratio_new.min().item() << std::endl; - // Update global ratio and values in-place (matches Python) ratio.index_copy_(0, idx, ratio_new.detach().squeeze(-1).to(torch::kFloat32)); @@ -592,15 +563,11 @@ pybind11::dict compiled_train( auto adv_normalized = mb_advantages; adv_normalized = mb_prio * (adv_normalized - adv_normalized.mean()) / (adv_normalized.std() + 1e-8); - //std::cout << "adv_normalized: " << std::fixed << std::setprecision(20) << adv_normalized.min().item() << std::endl; - // Policy loss auto pg_loss1 = -adv_normalized * ratio_new; auto pg_loss2 = -adv_normalized * torch::clamp(ratio_new, 1.0 - clip_coef, 1.0 + clip_coef); auto pg_loss = torch::max(pg_loss1, pg_loss2).mean(); - //std::cout << "pg_loss: " << pg_loss << std::endl; - // Value loss newvalue = newvalue.view(mb_returns.sizes()); auto v_clipped = mb_values + torch::clamp(newvalue - mb_values, -vf_clip_coef, vf_clip_coef); @@ -610,25 +577,19 @@ pybind11::dict compiled_train( values.index_copy_(0, idx, newvalue.detach().squeeze(-1).to(torch::kFloat32)); - //std::cout << "v_loss: " << v_loss << std::endl; - - // Entropy - auto entropy_loss = entropy; // Already mean - // Total loss - auto loss = pg_loss + vf_coef*v_loss - ent_coef*entropy_loss; - - //std::cout << "loss: " << loss << std::endl; + auto loss = pg_loss + vf_coef*v_loss - ent_coef*entropy; - // Accumulate stats - pg_sum += pg_loss.detach(); - v_sum += v_loss.detach(); - ent_sum += entropy_loss.detach(); - total_sum += loss.detach(); - - // KL and clipping diagnostics (matches Python) { torch::NoGradGuard no_grad; + + // Accumulate stats + pg_sum += pg_loss.detach(); + v_sum += v_loss.detach(); + ent_sum += entropy.detach(); + total_sum += loss.detach(); + + // KL and clipping diagnostics (matches Python) auto old_kl = (-logratio).mean(); auto kl = ((ratio_new - 1) - logratio).mean(); auto cf = (ratio_new - 1.0).abs().gt(clip_coef).to(torch::kFloat32).mean(); @@ -648,19 +609,7 @@ pybind11::dict compiled_train( // Gradient accumulation and step if ((mb + 1) % accumulate_minibatches == 0) { - torch::nn::utils::clip_grad_norm_(policy_32->parameters(), max_grad_norm); - - /* - // Print grads - for (auto& param : policy_32->parameters()) { - std::cout << param.grad().abs().sum() << std::endl; - } - */ - - // Print current lr - //std::cout << "Current lr: " - // << optimizer->param_groups()[0].options().get_lr() - // << std::endl; + torch::nn::utils::clip_grad_norm_(policy->parameters(), max_grad_norm); optimizer->step(); optimizer->zero_grad(); } @@ -674,15 +623,14 @@ pybind11::dict compiled_train( // Return losses (averaged) pybind11::dict losses; - auto num_mb = static_cast(total_minibatches); - losses["pg_loss"] = (pg_sum / num_mb).item(); - losses["v_loss"] = (v_sum / num_mb).item(); - losses["entropy"] = (ent_sum / num_mb).item(); - losses["total_loss"] = (total_sum / num_mb).item(); - losses["old_approx_kl"] = (old_approx_kl_sum / num_mb).item(); - losses["approx_kl"] = (approx_kl_sum / num_mb).item(); - losses["clipfrac"] = (clipfrac_sum / num_mb).item(); - losses["importance"] = (importance_sum / num_mb).item(); + losses["pg_loss"] = pg_sum.item() / total_minibatches; + losses["value_loss"] = v_sum.item() / total_minibatches; + losses["entropy"] = ent_sum.item() / total_minibatches; + losses["total_loss"] = total_sum.item() / total_minibatches; + losses["old_approx_kl"] = old_approx_kl_sum.item() / total_minibatches; + losses["approx_kl"] = approx_kl_sum.item() / total_minibatches; + losses["clipfrac"] = clipfrac_sum.item() / total_minibatches; + losses["importance"] = importance_sum.item() / total_minibatches; //losses["explained_variance"] = explained_var; return losses; @@ -707,8 +655,7 @@ PYBIND11_MODULE(_C, m) { m.def("create_pufferl", &create_pufferl); py::class_>(m, "PuffeRL") - .def_readwrite("policy_16", &pufferlib::PuffeRL::policy_16) - .def_readwrite("policy_32", &pufferlib::PuffeRL::policy_32) + .def_readwrite("policy", &pufferlib::PuffeRL::policy) .def_readwrite("optimizer", &pufferlib::PuffeRL::optimizer); py::class_, torch::nn::Module> cls(m, "PolicyLSTM"); diff --git a/pufferlib/ocean/squared/squared.h b/pufferlib/ocean/squared/squared.h index 4d4d5d0c5..e7c97ff98 100644 --- a/pufferlib/ocean/squared/squared.h +++ b/pufferlib/ocean/squared/squared.h @@ -59,11 +59,9 @@ void c_reset(Squared* env) { env->c = env->size/2; env->tick = 0; int target_idx = 0; // Deterministic for testing - /* do { target_idx = rand() % tiles; } while (target_idx == tiles/2); - */ env->observations[target_idx] = TARGET; } diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index accde9034..f23164510 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -652,8 +652,8 @@ def __call__(self, name, epoch, nest=False): if epoch % self.frequency != 0: return - #if torch.cuda.is_available(): - # torch.cuda.synchronize() + if torch.cuda.is_available(): + torch.cuda.synchronize() tick = time.time() if len(self.stack) != 0 and not nest: @@ -669,8 +669,8 @@ def pop(self, end): profile['delta'] += delta def end(self): - #if torch.cuda.is_available(): - # torch.cuda.synchronize() + if torch.cuda.is_available(): + torch.cuda.synchronize() end = time.time() for i in range(len(self.stack)): diff --git a/pufferlib/python_pufferl.py b/pufferlib/python_pufferl.py index 830f60262..d57bad9ee 100644 --- a/pufferlib/python_pufferl.py +++ b/pufferlib/python_pufferl.py @@ -321,7 +321,6 @@ def train(self): self.ratio[:] = 1 num_minibatches = config['num_minibatches'] - #num_minibatches = 1 for mb in range(num_minibatches): profile('train_misc', epoch, nest=True) self.amp_context.__enter__() @@ -332,8 +331,6 @@ def train(self): self.terminals, self.ratio, advantages, config['gamma'], config['gae_lambda'], config['vtrace_rho_clip'], config['vtrace_c_clip']) - #print("Adv Py", advantages.mean().item()) - profile('train_copy', epoch) adv = advantages.abs().sum(axis=1) prio_weights = torch.nan_to_num(adv**a, 0, 0, 0) @@ -342,8 +339,6 @@ def train(self): self.minibatch_segments, replacement=True) mb_prio = (self.segments*prio_probs[idx, None])**-anneal_beta - #print("Prio Py", mb_prio.mean().item()) - mb_obs = self.observations[idx] mb_actions = self.actions[idx] mb_logprobs = self.logprobs[idx] @@ -355,19 +350,6 @@ def train(self): mb_returns = advantages[idx] + mb_values mb_advantages = advantages[idx] - ''' - print("mb_obs Py", mb_obs.sum()) - print("mb_actions Py", mb_actions.sum()) - print("mb_logprobs Py", mb_logprobs.min()) - print("mb_rewards Py", mb_rewards.min()) - print("mb_terminals Py", mb_terminals.min()) - print("mb_truncations Py", mb_truncations.min()) - print("mb_ratio Py", mb_ratio.min()) - print("mb_values Py", mb_values.min()) - print("mb_returns Py", mb_returns.min()) - print("mb_advantages Py", mb_advantages.min()) - ''' - profile('train_forward', epoch) if not config['use_rnn']: mb_obs = mb_obs.reshape(-1, *self.vecenv.single_observation_space.shape) @@ -388,46 +370,28 @@ def train(self): ratio = logratio.exp() self.ratio[idx] = ratio.detach() - ''' - print("newlogprob Py", newlogprob.min().item()) - print("entropy Py", entropy.min()) - print(f"logratio_new Py {logratio.min().item():.16f}") - print(f"ratio Py {ratio.min().item():.16f}") - ''' - with torch.no_grad(): old_approx_kl = (-logratio).mean() approx_kl = ((ratio - 1) - logratio).mean() clipfrac = ((ratio - 1.0).abs() > config['clip_coef']).float().mean() - #adv = advantages[idx] - #adv = compute_puff_advantage(mb_values, mb_rewards, mb_terminals, - # ratio, adv, config['gamma'], config['gae_lambda'], - # config['vtrace_rho_clip'], config['vtrace_c_clip']) adv = mb_advantages adv = mb_prio * (adv - adv.mean()) / (adv.std() + 1e-8) - #print("new advantage", adv.min().item()) - # Losses pg_loss1 = -adv * ratio pg_loss2 = -adv * torch.clamp(ratio, 1 - clip_coef, 1 + clip_coef) pg_loss = torch.max(pg_loss1, pg_loss2).mean() - #print("pg_loss Py", pg_loss.item()) - newvalue = newvalue.view(mb_returns.shape) v_clipped = mb_values + torch.clamp(newvalue - mb_values, -vf_clip, vf_clip) v_loss_unclipped = (newvalue - mb_returns) ** 2 v_loss_clipped = (v_clipped - mb_returns) ** 2 v_loss = 0.5*torch.max(v_loss_unclipped, v_loss_clipped).mean() - #print("v_loss Py", v_loss.item()) - entropy_loss = entropy.mean() loss = pg_loss + config['vf_coef']*v_loss - config['ent_coef']*entropy_loss - #print("loss Py", loss.item()) self.amp_context.__enter__() # TODO: AMP needs some debugging # This breaks vloss clipping? @@ -448,12 +412,6 @@ def train(self): loss.backward() if (mb + 1) % self.accumulate_minibatches == 0: torch.nn.utils.clip_grad_norm_(self.policy.parameters(), config['max_grad_norm']) - # Print grads - #for param in self.policy.parameters(): - # print(param.grad.abs().sum()) - - # Print current lr - #print(f'Current lr: {self.optimizer.param_groups[0]["lr"]}') self.optimizer.step() self.optimizer.zero_grad() @@ -737,7 +695,7 @@ def dist_mean(value, device): return dist_sum(value, device) / torch.distributed.get_world_size() class Profile: - def __init__(self, frequency=5): + def __init__(self, frequency=1): self.profiles = defaultdict(lambda: defaultdict(float)) self.frequency = frequency self.stack = [] @@ -752,8 +710,8 @@ def __call__(self, name, epoch, nest=False): if epoch % self.frequency != 0: return - #if torch.cuda.is_available(): - # torch.cuda.synchronize() + if torch.cuda.is_available(): + torch.cuda.synchronize() tick = time.time() if len(self.stack) != 0 and not nest: @@ -769,8 +727,8 @@ def pop(self, end): profile['delta'] += delta def end(self): - #if torch.cuda.is_available(): - # torch.cuda.synchronize() + if torch.cuda.is_available(): + torch.cuda.synchronize() end = time.time() for i in range(len(self.stack)): From 5b0638c806d03ddd8132e5b943bb6c3406d94b4c Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 4 Nov 2025 18:40:19 +0000 Subject: [PATCH 089/188] Mingru --- pufferlib/config/ocean/breakout.ini | 6 +- pufferlib/models.py | 128 ++++++++++++++++++++++++++++ pufferlib/ocean/torch.py | 2 +- pufferlib/pufferl.py | 31 ++----- 4 files changed, 140 insertions(+), 27 deletions(-) diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index d261503f5..6c4ed4ac6 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -26,11 +26,11 @@ paddle_speed = 620 continuous = 0 [policy] -hidden_size = 128 +hidden_size = 256 [rnn] -input_size = 128 -hidden_size = 128 +input_size = 256 +hidden_size = 256 [train] total_timesteps = 90_000_000 diff --git a/pufferlib/models.py b/pufferlib/models.py index fa43d7071..b17e0772a 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -8,6 +8,79 @@ import pufferlib.pytorch import pufferlib.spaces +# https://arxiv.org/abs/2410.01201v1 + +import torch +import torch.nn.functional as F +from torch.nn import Linear, Identity, Module + +def exists(v): + return v is not None + +def default(v, d): + return v if exists(v) else d + +# appendix B +# https://github.com/glassroom/heinsen_sequence + +def heinsen_associative_scan_log(log_coeffs, log_values): + a_star = log_coeffs.cumsum(dim = 1) + log_h0_plus_b_star = (log_values - a_star).logcumsumexp(dim = 1) + log_h = a_star + log_h0_plus_b_star + return log_h.exp() + +# appendix B.3 + +def g(x): + return torch.where(x >= 0, x + 0.5, x.sigmoid()) + +def log_g(x): + return torch.where(x >= 0, (F.relu(x) + 0.5).log(), -F.softplus(-x)) + +# log-space version of minGRU - B.3.1 +# they enforce the hidden states to be positive + +class minGRU(Module): + def __init__(self, dim, expansion_factor = 1., proj_out = None): + super().__init__() + + dim_inner = int(dim * expansion_factor) + proj_out = default(proj_out, expansion_factor != 1.) + + self.to_hidden_and_gate = Linear(dim, dim_inner * 2, bias = False) + self.to_out = Linear(dim_inner, dim, bias = False) if proj_out else Identity() + + def forward(self, x, prev_hidden = None): + seq_len = x.shape[1] + hidden, gate = self.to_hidden_and_gate(x).chunk(2, dim = -1) + + if seq_len == 1: + # handle sequential + + hidden = g(hidden) + gate = gate.sigmoid() + out = torch.lerp(prev_hidden, hidden, gate) if exists(prev_hidden) else (hidden * gate) + else: + # parallel + + log_coeffs = -F.softplus(gate) + + log_z = -F.softplus(-gate) + log_tilde_h = log_g(hidden) + log_values = log_z + log_tilde_h + + if exists(prev_hidden): + log_values = torch.cat((prev_hidden.log(), log_values), dim = 1) + log_coeffs = F.pad(log_coeffs, (0, 0, 1, 0)) + + out = heinsen_associative_scan_log(log_coeffs, log_values) + out = out[:, -seq_len:] + + next_prev_hidden = out[:, -1:] + + out = self.to_out(out) + + return out, next_prev_hidden class Default(nn.Module): '''Default PyTorch policy. Flattens obs and applies a linear layer. @@ -97,6 +170,61 @@ def decode_actions(self, hidden): values = self.value(hidden) return logits, values +class MinGRUWrapper(nn.Module): + def __init__(self, env, policy, input_size=128, hidden_size=128): + super().__init__() + self.obs_shape = env.single_observation_space.shape + self.policy = policy + + self.input_size = input_size + self.hidden_size = hidden_size + self.is_continuous = self.policy.is_continuous + + self.mingru = minGRU(hidden_size) + + def forward_eval(self, observations, state): + '''Forward function for inference. 3x faster than using LSTM directly''' + assert state.shape[0] == observations.shape[0] + observations = observations.unsqueeze(1) + state = state.unsqueeze(1) + logits, values, state = self.forward(observations, state) + state = state.squeeze(1) + return logits, values, state + + def forward(self, observations, state): + '''Forward function for training. Uses LSTM for fast time-batching''' + x = observations + x_shape, space_shape = x.shape, self.obs_shape + x_n, space_n = len(x_shape), len(space_shape) + if x_shape[-space_n:] != space_shape: + raise ValueError('Invalid input tensor shape', x.shape) + + if x_n == space_n + 1: + B, TT = x_shape[0], 1 + elif x_n == space_n + 2: + B, TT = x_shape[:2] + else: + raise ValueError('Invalid input tensor shape', x.shape) + + assert state.shape[0] == B + + x = x.reshape(B*TT, *space_shape) + hidden = self.policy.encode_observations(x, state) + assert hidden.shape == (B*TT, self.input_size) + + hidden = hidden.reshape(B, TT, self.input_size) + + #hidden = hidden.transpose(0, 1) + hidden, state = self.mingru(hidden, state) + + #hidden = hidden.transpose(0, 1) + + flat_hidden = hidden.reshape(B*TT, self.hidden_size) + logits, values = self.policy.decode_actions(flat_hidden) + values = values.reshape(B, TT) + state = state.detach() + return logits, values, state + class LSTMWrapper(nn.Module): def __init__(self, env, policy, input_size=128, hidden_size=128): '''Wraps your policy with an LSTM without letting you shoot yourself in the diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index 8cf4ffe7d..25bab74f1 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -14,7 +14,7 @@ from pufferlib.models import Default as Policy from pufferlib.models import Convolutional as Conv -Recurrent = pufferlib.models.LSTMWrapper +Recurrent = pufferlib.models.MinGRUWrapper from pufferlib.pytorch import layer_init, _nativize_dtype, nativize_tensor import numpy as np diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index f74fec44b..347092942 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -113,8 +113,7 @@ def __init__(self, config, vecenv, policy, logger=None): if config['use_rnn']: n = vecenv.agents_per_batch h = policy.hidden_size - self.lstm_h = {i*n: torch.zeros(n, h, device=device) for i in range(total_agents//n)} - self.lstm_c = {i*n: torch.zeros(n, h, device=device) for i in range(total_agents//n)} + self.state = {i*n: torch.zeros(n, h, device=device) for i in range(total_agents//n)} # Minibatching & gradient accumulation minibatch_size = config['minibatch_size'] @@ -234,9 +233,8 @@ def evaluate(self): device = config['device'] if config['use_rnn']: - for k in self.lstm_h: - self.lstm_h[k].zero_() - self.lstm_c[k].zero_() + for k in self.state: + self.state[k].zero_() self.full_rows = 0 while self.full_rows < self.segments: @@ -257,26 +255,17 @@ def evaluate(self): profile('eval_forward', epoch) with torch.no_grad(), self.amp_context: - state = dict( - reward=r, - done=d, - env_id=env_id, - mask=mask, - ) - if config['use_rnn']: - state['lstm_h'] = self.lstm_h[env_id.start] - state['lstm_c'] = self.lstm_c[env_id.start] + state = self.state[env_id.start] - logits, value = self.policy.forward_eval(o_device, state) + logits, value, state = self.policy.forward_eval(o_device, state) action, logprob, _ = pufferlib.pytorch.sample_logits(logits) r = torch.clamp(r, -1, 1) profile('eval_copy', epoch) with torch.no_grad(): if config['use_rnn']: - self.lstm_h[env_id.start] = state['lstm_h'] - self.lstm_c[env_id.start] = state['lstm_c'] + self.state[env_id.start] = state # Fast path for fully vectorized envs l = self.ep_lengths[env_id.start].item() @@ -376,13 +365,9 @@ def train(self): if not config['use_rnn']: mb_obs = mb_obs.reshape(-1, *self.vecenv.single_observation_space.shape) - state = dict( - action=mb_actions, - lstm_h=None, - lstm_c=None, - ) + state = torch.zeros(mb_obs.shape[0], 1, self.policy.hidden_size, device=device) - logits, newvalue = self.policy(mb_obs, state) + logits, newvalue, _ = self.policy(mb_obs, state) actions, newlogprob, entropy = pufferlib.pytorch.sample_logits(logits, action=mb_actions) profile('train_misc', epoch) From 0b50ca1c3db80bd012f1d0737301dbb0edd15189 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 4 Nov 2025 19:32:44 +0000 Subject: [PATCH 090/188] Multilayer --- pufferlib/config/ocean/breakout.ini | 7 ++++--- pufferlib/models.py | 24 ++++++++++++++---------- pufferlib/pufferl.py | 5 +++-- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index 6c4ed4ac6..ff7f282ea 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -26,11 +26,12 @@ paddle_speed = 620 continuous = 0 [policy] -hidden_size = 256 +hidden_size = 128 [rnn] -input_size = 256 -hidden_size = 256 +input_size = 128 +hidden_size = 128 +num_layers = 3 [train] total_timesteps = 90_000_000 diff --git a/pufferlib/models.py b/pufferlib/models.py index b17e0772a..ebbb3489d 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -171,24 +171,25 @@ def decode_actions(self, hidden): return logits, values class MinGRUWrapper(nn.Module): - def __init__(self, env, policy, input_size=128, hidden_size=128): + def __init__(self, env, policy, input_size=128, hidden_size=128, num_layers=1): super().__init__() self.obs_shape = env.single_observation_space.shape self.policy = policy self.input_size = input_size self.hidden_size = hidden_size + self.num_layers = num_layers self.is_continuous = self.policy.is_continuous - self.mingru = minGRU(hidden_size) + self.mingru = nn.ModuleList([minGRU(hidden_size) for _ in range(num_layers)]) def forward_eval(self, observations, state): '''Forward function for inference. 3x faster than using LSTM directly''' - assert state.shape[0] == observations.shape[0] + assert state.shape[1] == observations.shape[0] observations = observations.unsqueeze(1) - state = state.unsqueeze(1) - logits, values, state = self.forward(observations, state) - state = state.squeeze(1) + states = state.unsqueeze(2) + logits, values, state = self.forward(observations, states) + state = state.squeeze(2) return logits, values, state def forward(self, observations, state): @@ -206,7 +207,7 @@ def forward(self, observations, state): else: raise ValueError('Invalid input tensor shape', x.shape) - assert state.shape[0] == B + assert state.shape[1] == B x = x.reshape(B*TT, *space_shape) hidden = self.policy.encode_observations(x, state) @@ -215,14 +216,17 @@ def forward(self, observations, state): hidden = hidden.reshape(B, TT, self.input_size) #hidden = hidden.transpose(0, 1) - hidden, state = self.mingru(hidden, state) - + #states = list(state.split(self.num_layers, dim=0)) + states = [state[i] for i in range(self.num_layers)] + for i in range(self.num_layers): + hidden, states[i] = self.mingru[i](hidden, states[i]) + #hidden = hidden.transpose(0, 1) flat_hidden = hidden.reshape(B*TT, self.hidden_size) logits, values = self.policy.decode_actions(flat_hidden) values = values.reshape(B, TT) - state = state.detach() + state = torch.stack(states, dim=0).detach() return logits, values, state class LSTMWrapper(nn.Module): diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 347092942..95a3e07c2 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -113,7 +113,8 @@ def __init__(self, config, vecenv, policy, logger=None): if config['use_rnn']: n = vecenv.agents_per_batch h = policy.hidden_size - self.state = {i*n: torch.zeros(n, h, device=device) for i in range(total_agents//n)} + l = policy.num_layers + self.state = {i*n: torch.zeros(l, n, h, device=device) for i in range(total_agents//n)} # Minibatching & gradient accumulation minibatch_size = config['minibatch_size'] @@ -365,7 +366,7 @@ def train(self): if not config['use_rnn']: mb_obs = mb_obs.reshape(-1, *self.vecenv.single_observation_space.shape) - state = torch.zeros(mb_obs.shape[0], 1, self.policy.hidden_size, device=device) + state = torch.zeros(self.policy.num_layers, mb_obs.shape[0], 1, self.policy.hidden_size, device=device) logits, newvalue, _ = self.policy(mb_obs, state) actions, newlogprob, entropy = pufferlib.pytorch.sample_logits(logits, action=mb_actions) From d7f8880d2186e256f63a717add918d65d22a0022 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 4 Nov 2025 21:27:26 +0000 Subject: [PATCH 091/188] progress --- pufferlib/config/ocean/breakout.ini | 8 ++++---- pufferlib/models.py | 2 ++ pufferlib/pufferl.py | 8 ++++---- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index ff7f282ea..4971c1d82 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -26,12 +26,12 @@ paddle_speed = 620 continuous = 0 [policy] -hidden_size = 128 +hidden_size = 256 [rnn] -input_size = 128 -hidden_size = 128 -num_layers = 3 +input_size = 256 +hidden_size = 256 +num_layers = 2 [train] total_timesteps = 90_000_000 diff --git a/pufferlib/models.py b/pufferlib/models.py index ebbb3489d..5f3596aab 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -48,7 +48,9 @@ def __init__(self, dim, expansion_factor = 1., proj_out = None): proj_out = default(proj_out, expansion_factor != 1.) self.to_hidden_and_gate = Linear(dim, dim_inner * 2, bias = False) + nn.init.orthogonal_(self.to_hidden_and_gate.weight) self.to_out = Linear(dim_inner, dim, bias = False) if proj_out else Identity() + #nn.init.orthogonal_(self.to_out.weight) def forward(self, x, prev_hidden = None): seq_len = x.shape[1] diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 95a3e07c2..e30739113 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -720,8 +720,8 @@ def __call__(self, name, epoch, nest=False): if (epoch + 1) % self.frequency != 0: return - #if torch.cuda.is_available(): - # torch.cuda.synchronize() + if torch.cuda.is_available(): + torch.cuda.synchronize() tick = time.time() if len(self.stack) != 0 and not nest: @@ -738,8 +738,8 @@ def pop(self, end): profile['elapsed'] += delta * self.frequency def end(self): - #if torch.cuda.is_available(): - # torch.cuda.synchronize() + if torch.cuda.is_available(): + torch.cuda.synchronize() end = time.time() for i in range(len(self.stack)): From a8d1423a1755192efea9cdde0016b70ecdf8ee0e Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 4 Nov 2025 22:06:38 +0000 Subject: [PATCH 092/188] Add torch muon --- pufferlib/pufferl.py | 64 ++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 3972e722f..a1802efde 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -145,37 +145,18 @@ def __init__(self, config, vecenv, policy, logger=None): self.policy.forward_eval = torch.compile(policy, mode=config['compile_mode']) pufferlib.pytorch.sample_logits = torch.compile(pufferlib.pytorch.sample_logits, mode=config['compile_mode']) - # Optimizer - if config['optimizer'] == 'adam': - optimizer = torch.optim.Adam( - self.policy.parameters(), - lr=config['learning_rate'], - betas=(config['adam_beta1'], config['adam_beta2']), - eps=config['adam_eps'], - ) - elif config['optimizer'] == 'muon': - import heavyball - from heavyball import ForeachMuon - warnings.filterwarnings(action='ignore', category=UserWarning, module=r'heavyball.*') - heavyball.utils.compile_mode = "default" - - # # optionally a little bit better/faster alternative to newtonschulz iteration - # import heavyball.utils - # heavyball.utils.zeroth_power_mode = 'thinky_polar_express' - - # heavyball_momentum=True introduced in heavyball 2.1.1 - # recovers heavyball-1.7.2 behaviour - previously swept hyperparameters work well - optimizer = ForeachMuon( - self.policy.parameters(), - lr=config['learning_rate'], - betas=(config['adam_beta1'], config['adam_beta2']), - eps=config['adam_eps'], - heavyball_momentum=True, - ) - else: - raise ValueError(f'Unknown optimizer: {config["optimizer"]}') - - self.optimizer = optimizer + self.muon = torch.optim.Muon( + [e for e in self.policy.parameters() if e.dim() > 1], + lr=config['learning_rate'], + eps=config['adam_eps'], + adjust_lr_fn='match_rms_adamw' + ) + self.adam = torch.optim.Adam( + [e for e in self.policy.parameters() if e.dim() == 1], + lr=config['learning_rate'], + betas=(config['adam_beta1'], config['adam_beta2']), + eps=config['adam_eps'], + ) # Logging self.logger = logger @@ -184,7 +165,6 @@ def __init__(self, config, vecenv, policy, logger=None): # Learning rate scheduler epochs = config['total_timesteps'] // config['batch_size'] - self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs) self.total_epochs = epochs # Automatic mixed precision @@ -343,6 +323,13 @@ def train(self): anneal_beta = b0 + (1 - b0)*a*self.epoch/self.total_epochs self.ratio[:] = 1 + learning_rate = config['learning_rate'] + if config['anneal_lr'] and self.epoch > 0: + lr_ratio = self.epoch / self.total_epochs + learning_rate = learning_rate * 0.5 * (1 + np.cos(np.pi * lr_ratio)) + self.muon.param_groups[0]['lr'] = learning_rate + self.adam.param_groups[0]['lr'] = learning_rate + for mb in range(self.total_minibatches): profile('train_misc', epoch) self.amp_context.__enter__() @@ -440,13 +427,13 @@ def train(self): loss.backward() if (mb + 1) % self.accumulate_minibatches == 0: torch.nn.utils.clip_grad_norm_(self.policy.parameters(), config['max_grad_norm']) - self.optimizer.step() - self.optimizer.zero_grad() + self.muon.step() + self.adam.step() + self.muon.zero_grad() + self.adam.zero_grad() # Reprioritize experience profile('train_misc', epoch) - if config['anneal_lr']: - self.scheduler.step() y_pred = self.values.flatten() y_true = advantages.flatten() + self.values.flatten() @@ -491,7 +478,7 @@ def mean_and_log(self): 'agent_steps': agent_steps, 'uptime': time.time() - self.start_time, 'epoch': int(dist_sum(self.epoch, device)), - 'learning_rate': self.optimizer.param_groups[0]["lr"], + 'learning_rate': self.muon.param_groups[0]["lr"], **{f'environment/{k}': v for k, v in self.stats.items()}, **{f'losses/{k}': v for k, v in self.losses.items()}, **{f'performance/{k}': v['elapsed'] for k, v in self.profile}, @@ -537,7 +524,8 @@ def save_checkpoint(self): torch.save(self.uncompiled_policy.state_dict(), model_path) state = { - 'optimizer_state_dict': self.optimizer.state_dict(), + 'adam_state_dict': self.adam.state_dict(), + 'muon_state_dict': self.muon.state_dict(), 'global_step': self.global_step, 'agent_step': self.global_step, 'update': self.epoch, From bc74c3fac70d07006d303aa1299659b0a6a8ee3d Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 4 Nov 2025 22:32:28 +0000 Subject: [PATCH 093/188] Stable --- pufferlib/config/default.ini | 2 +- pufferlib/config/ocean/breakout.ini | 2 +- pufferlib/pufferl.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pufferlib/config/default.ini b/pufferlib/config/default.ini index 6073c651e..793fab71d 100644 --- a/pufferlib/config/default.ini +++ b/pufferlib/config/default.ini @@ -51,7 +51,7 @@ minibatch_size = 8192 max_minibatch_size = 32768 bptt_horizon = 64 compile = False -compile_mode = max-autotune-no-cudagraphs +compile_mode = reduce-overhead compile_fullgraph = True vtrace_rho_clip = 1.0 diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index 4971c1d82..915539f36 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -31,7 +31,7 @@ hidden_size = 256 [rnn] input_size = 256 hidden_size = 256 -num_layers = 2 +num_layers = 1 [train] total_timesteps = 90_000_000 diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index add690406..c702d286c 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -142,7 +142,7 @@ def __init__(self, config, vecenv, policy, logger=None): self.policy = policy if config['compile']: self.policy = torch.compile(policy, mode=config['compile_mode']) - self.policy.forward_eval = torch.compile(policy, mode=config['compile_mode']) + self.policy.forward_eval = torch.compile(policy.forward_eval, mode=config['compile_mode']) pufferlib.pytorch.sample_logits = torch.compile(pufferlib.pytorch.sample_logits, mode=config['compile_mode']) self.muon = torch.optim.Muon( @@ -215,7 +215,7 @@ def evaluate(self): if config['use_rnn']: for k in self.state: - self.state[k].zero_() + self.state[k] = torch.zeros_like(self.state[k]) self.full_rows = 0 while self.full_rows < self.segments: From 676bdf26c3fad76357b434a436c8cc73703c0d0e Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 4 Nov 2025 22:39:09 +0000 Subject: [PATCH 094/188] Update sweep defaults --- pufferlib/config/default.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/config/default.ini b/pufferlib/config/default.ini index 793fab71d..a2fee4309 100644 --- a/pufferlib/config/default.ini +++ b/pufferlib/config/default.ini @@ -157,7 +157,7 @@ scale = auto # but this results in jank unstable runs [sweep.train.vf_clip_coef] distribution = uniform -min = 0.1 +min = 0.5 max = 5.0 mean = 0.2 scale = auto From 05750584a211f57bf2dee8c1b4de5236361e0fab Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 5 Nov 2025 17:31:23 +0000 Subject: [PATCH 095/188] Mamba --- pufferlib/config/ocean/breakout.ini | 38 ++++++++-------- pufferlib/models.py | 70 ++++++++++++++++++++++++++++- pufferlib/ocean/torch.py | 2 +- pufferlib/pufferl.py | 22 +++++---- 4 files changed, 102 insertions(+), 30 deletions(-) diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index 915539f36..cbaeb1bbf 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -26,33 +26,33 @@ paddle_speed = 620 continuous = 0 [policy] -hidden_size = 256 +hidden_size = 128 [rnn] -input_size = 256 -hidden_size = 256 +input_size = 128 +hidden_size = 128 num_layers = 1 [train] -total_timesteps = 90_000_000 -adam_beta1 = 0.8946507418260217 -adam_beta2 = 0.9 +total_timesteps = 120_000_000 +adam_beta1 = 0.8166332218104871 +adam_beta2 = 0.9984879989750705 adam_eps = 0.0001 batch_size = auto bptt_horizon = 64 -clip_coef = 0.19696765958267629 -ent_coef = 0.0005690816545012474 -gae_lambda = 0.747650023961198 -gamma = 0.9997053654668936 -learning_rate = 0.044482546441415506 -max_grad_norm = 2.2356112188495723 -minibatch_size = 32768 -prio_alpha = 0.98967001208896 -prio_beta0 = 0.09999999999999998 -vf_clip_coef = 2.178492167689251 -vf_coef = 1.6832989594296321 -vtrace_c_clip = 2.878171091654008 -vtrace_rho_clip = 0.7876748061547312 +clip_coef = 0.42526610231849393 +ent_coef = 0.0026822968018267775 +gae_lambda = 0.995 +gamma = 0.9731819086255716 +learning_rate = 0.04301709139429238 +max_grad_norm = 0.7029618837611082 +minibatch_size = 16384 +prio_alpha = 0.09999999999999998 +prio_beta0 = 0.8437844355214735 +vf_clip_coef = 0.807798225723059 +vf_coef = 2.9089121311247554 +vtrace_c_clip = 1.6205569942514606 +vtrace_rho_clip = 1.1777184656786774 [sweep.train.total_timesteps] distribution = log_normal diff --git a/pufferlib/models.py b/pufferlib/models.py index 5f3596aab..34ce8dce6 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -5,7 +5,6 @@ import torch.nn as nn import pufferlib.emulation -import pufferlib.pytorch import pufferlib.spaces # https://arxiv.org/abs/2410.01201v1 @@ -231,6 +230,75 @@ def forward(self, observations, state): state = torch.stack(states, dim=0).detach() return logits, values, state +class MambaWrapper(nn.Module): + def __init__(self, env, policy, input_size=128, hidden_size=128, num_layers=1): + super().__init__() + self.obs_shape = env.single_observation_space.shape + self.policy = policy + + self.input_size = input_size + self.hidden_size = hidden_size + self.num_layers = num_layers + self.is_continuous = self.policy.is_continuous + + from mamba_ssm import Mamba2 + self.mamba = Mamba2(d_model=hidden_size, d_state=64, d_conv=4, expand=2) + + def init_state(self, batch_size, device): + conv_state = torch.zeros( + batch_size, + self.mamba.d_conv, + self.mamba.conv1d.weight.shape[0], + device=device, + dtype=self.mamba.conv1d.weight.dtype, + ).transpose(1, 2).to(device) + ssm_state = torch.zeros( + batch_size, + self.mamba.nheads, + self.mamba.headdim, + self.mamba.d_state, + device=device, + dtype=self.mamba.in_proj.weight.dtype, + ).to(device) + return conv_state, ssm_state + + def forward_eval(self, observations, conv_state, ssm_state): + '''Forward function for inference. 3x faster than using LSTM directly''' + hidden = self.policy.encode_observations(observations, None) + hidden = hidden.unsqueeze(1) + hidden, conv_state, ssm_state = self.mamba.step(hidden, conv_state, ssm_state) + hidden = hidden.squeeze(1) + logits, values = self.policy.decode_actions(hidden) + return logits, values, conv_state, ssm_state + + def forward(self, observations): + '''Forward function for training. Uses LSTM for fast time-batching''' + x = observations + x_shape, space_shape = x.shape, self.obs_shape + x_n, space_n = len(x_shape), len(space_shape) + if x_shape[-space_n:] != space_shape: + raise ValueError('Invalid input tensor shape', x.shape) + + if x_n == space_n + 1: + B, TT = x_shape[0], 1 + elif x_n == space_n + 2: + B, TT = x_shape[:2] + else: + raise ValueError('Invalid input tensor shape', x.shape) + + x = x.reshape(B*TT, *space_shape) + hidden = self.policy.encode_observations(x, None) + assert hidden.shape == (B*TT, self.input_size) + + hidden = hidden.reshape(B, TT, self.input_size) + hidden = self.mamba(hidden) + + flat_hidden = hidden.reshape(B*TT, self.hidden_size) + logits, values = self.policy.decode_actions(flat_hidden) + values = values.reshape(B, TT) + return logits, values + + class LSTMWrapper(nn.Module): def __init__(self, env, policy, input_size=128, hidden_size=128): '''Wraps your policy with an LSTM without letting you shoot yourself in the diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index 25bab74f1..457d238ba 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -14,7 +14,7 @@ from pufferlib.models import Default as Policy from pufferlib.models import Convolutional as Conv -Recurrent = pufferlib.models.MinGRUWrapper +Recurrent = pufferlib.models.MambaWrapper from pufferlib.pytorch import layer_init, _nativize_dtype, nativize_tensor import numpy as np diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index c702d286c..f64c86635 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -115,6 +115,9 @@ def __init__(self, config, vecenv, policy, logger=None): h = policy.hidden_size l = policy.num_layers self.state = {i*n: torch.zeros(l, n, h, device=device) for i in range(total_agents//n)} + conv_states, ssm_states = policy.init_state(total_agents, device) + self.conv_state = {i*n: conv_states[i*n:(i+1)*n] for i in range(total_agents//n)} + self.ssm_state = {i*n: ssm_states[i*n:(i+1)*n] for i in range(total_agents//n)} # Minibatching & gradient accumulation minibatch_size = config['minibatch_size'] @@ -146,13 +149,13 @@ def __init__(self, config, vecenv, policy, logger=None): pufferlib.pytorch.sample_logits = torch.compile(pufferlib.pytorch.sample_logits, mode=config['compile_mode']) self.muon = torch.optim.Muon( - [e for e in self.policy.parameters() if e.dim() > 1], + [e for e in self.policy.parameters() if e.dim() == 2], lr=config['learning_rate'], eps=config['adam_eps'], adjust_lr_fn='match_rms_adamw' ) self.adam = torch.optim.Adam( - [e for e in self.policy.parameters() if e.dim() == 1], + [e for e in self.policy.parameters() if e.dim() != 2], lr=config['learning_rate'], betas=(config['adam_beta1'], config['adam_beta2']), eps=config['adam_eps'], @@ -215,7 +218,8 @@ def evaluate(self): if config['use_rnn']: for k in self.state: - self.state[k] = torch.zeros_like(self.state[k]) + self.conv_state[k] = torch.zeros_like(self.conv_state[k]) + self.ssm_state[k] = torch.zeros_like(self.ssm_state[k]) self.full_rows = 0 while self.full_rows < self.segments: @@ -237,16 +241,18 @@ def evaluate(self): profile('eval_forward', epoch) with torch.no_grad(), self.amp_context: if config['use_rnn']: - state = self.state[env_id.start] + conv_state = self.conv_state[env_id.start] + ssm_state = self.ssm_state[env_id.start] - logits, value, state = self.policy.forward_eval(o_device, state) + logits, value, conv_state, ssm_state = self.policy.forward_eval(o_device, conv_state, ssm_state) action, logprob, _ = pufferlib.pytorch.sample_logits(logits) r = torch.clamp(r, -1, 1) profile('eval_copy', epoch) with torch.no_grad(): if config['use_rnn']: - self.state[env_id.start] = state + self.conv_state[env_id.start] = conv_state + self.ssm_state[env_id.start] = ssm_state # Fast path for fully vectorized envs l = self.ep_lengths[env_id.start].item() @@ -353,9 +359,7 @@ def train(self): if not config['use_rnn']: mb_obs = mb_obs.reshape(-1, *self.vecenv.single_observation_space.shape) - state = torch.zeros(self.policy.num_layers, mb_obs.shape[0], 1, self.policy.hidden_size, device=device) - - logits, newvalue, _ = self.policy(mb_obs, state) + logits, newvalue = self.policy(mb_obs) actions, newlogprob, entropy = pufferlib.pytorch.sample_logits(logits, action=mb_actions) profile('train_misc', epoch) From 9df8eda2de0494cbedf0ccc6f3fbadaaee2e9084 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 5 Nov 2025 21:39:34 +0000 Subject: [PATCH 096/188] Testing new archs --- pufferlib/config/default.ini | 2 +- pufferlib/config/ocean/breakout.ini | 68 +++--- pufferlib/config/ocean/g2048.ini | 52 ++++- pufferlib/config/ocean/tetris.ini | 3 - pufferlib/models.py | 318 +++++++++++++++------------- pufferlib/ocean/torch.py | 3 +- pufferlib/pufferl.py | 47 ---- pufferlib/python_pufferl.py | 102 ++++----- pufferlib/sweep.py | 2 +- pyproject.toml | 2 +- setup.py | 2 +- 11 files changed, 307 insertions(+), 294 deletions(-) diff --git a/pufferlib/config/default.ini b/pufferlib/config/default.ini index e63a56298..41098da92 100644 --- a/pufferlib/config/default.ini +++ b/pufferlib/config/default.ini @@ -163,7 +163,7 @@ scale = auto # but this results in jank unstable runs [sweep.train.vf_clip_coef] distribution = uniform -min = 0.5 +min = 0.1 max = 5.0 mean = 0.2 scale = auto diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index 4b8cfff9d..39c4320d2 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -1,20 +1,15 @@ [base] package = ocean env_name = puffer_breakout -policy_name = Policy -#rnn_name = Recurrent -rnn_name = None +policy_name = Mamba +rnn_name = Recurrent [vec] -#num_envs = 1 num_envs = 4 -# Experiment - fewer cores per env to avoid clogging - [env] -#num_envs = 64 num_envs = 2048 -frameskip = 1 +frameskip = 4 width = 576 height = 330 paddle_width = 62 @@ -32,11 +27,45 @@ continuous = 0 [policy] hidden_size = 128 +num_layers = 4 +d_state = 32 +d_conv = 4 +expand = 2 -[rnn] -input_size = 128 -hidden_size = 128 -num_layers = 1 +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 16 +max = 256 +mean = 128 +scale = auto + +[sweep.policy.num_layers] +distribution = int_uniform +min = 1 +max = 4 +mean = 2 +scale = auto + +[sweep.policy.d_state] +distribution = uniform_pow2 +min = 8 +max = 128 +mean = 32 +scale = auto + +[sweep.policy.d_conv] +distribution = int_uniform +min = 1 +max = 4 +mean = 2 +scale = auto + +[sweep.policy.expand] +distribution = int_uniform +min = 1 +max = 2 +mean = 1 +scale = auto [train] total_timesteps = 120_000_000 @@ -84,21 +113,6 @@ vtrace_rho_clip = 1.1777184656786774 downsample = 10 max_cost = 240 -[sweep.train.total_timesteps] -distribution = log_normal -min = 2e7 -max = 9e7 -# Experiment: up from 6e7 max -mean = 4e7 -scale = auto - -[sweep.policy.hidden_size] -distribution = uniform_pow2 -min = 16 -max = 1024 -mean = 128 -scale = auto - [sweep.env.num_envs] distribution = uniform_pow2 min = 1 diff --git a/pufferlib/config/ocean/g2048.ini b/pufferlib/config/ocean/g2048.ini index dd5c185d2..620d2f38a 100644 --- a/pufferlib/config/ocean/g2048.ini +++ b/pufferlib/config/ocean/g2048.ini @@ -1,12 +1,9 @@ [base] package = ocean env_name = puffer_g2048 -policy_name = Policy +policy_name = MinGRU rnn_name = Recurrent -[policy] -hidden_size = 256 - [rnn] input_size = 256 hidden_size = 256 @@ -17,6 +14,49 @@ num_envs = 4 [env] num_envs = 4096 +[policy] +hidden_size = 128 +num_layers = 2 +d_state = 32 +d_conv = 4 +expand = 2 + +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 16 +max = 256 +mean = 128 +scale = auto + +[sweep.policy.num_layers] +distribution = int_uniform +min = 1 +max = 4 +mean = 2 +scale = auto + +[sweep.policy.d_state] +distribution = uniform_pow2 +min = 8 +max = 128 +mean = 32 +scale = auto + +[sweep.policy.d_conv] +distribution = int_uniform +min = 1 +max = 4 +mean = 2 +scale = auto + +[sweep.policy.expand] +distribution = int_uniform +min = 1 +max = 2 +mean = 1 +scale = auto + + [train] # https://wandb.ai/kywch/pufferlib/runs/n8xml0u9?nw=nwuserkywch total_timesteps = 3_000_000_000 @@ -41,10 +81,10 @@ vf_coef = 2.0 vtrace_c_clip = 4.3 vtrace_rho_clip = 1.6 - [sweep] metric = score goal = maximize +max_cost = 300 [sweep.train.total_timesteps] distribution = log_normal @@ -65,4 +105,4 @@ distribution = logit_normal min = 0.01 mean = 0.6 max = 0.995 -scale = auto \ No newline at end of file +scale = auto diff --git a/pufferlib/config/ocean/tetris.ini b/pufferlib/config/ocean/tetris.ini index 15bc63b72..d371db4c8 100644 --- a/pufferlib/config/ocean/tetris.ini +++ b/pufferlib/config/ocean/tetris.ini @@ -23,9 +23,6 @@ hidden_size = 256 input_size = 256 hidden_size = 256 -[policy] -hidden_size = 128 - [train] # https://wandb.ai/kywch/pufferlib/runs/era6a8p6?nw=nwuserkywch total_timesteps = 3_000_000_000 diff --git a/pufferlib/models.py b/pufferlib/models.py index 2724c362f..e18e72295 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -39,7 +39,7 @@ def log_g(x): # log-space version of minGRU - B.3.1 # they enforce the hidden states to be positive -class minGRU(Module): +class MinGRULayer(Module): def __init__(self, dim, expansion_factor = 1., proj_out = None): super().__init__() @@ -83,48 +83,45 @@ def forward(self, x, prev_hidden = None): return out, next_prev_hidden -class Default(nn.Module): - '''Default PyTorch policy. Flattens obs and applies a linear layer. - - PufferLib is not a framework. It does not enforce a base class. - You can use any PyTorch policy that returns actions and values. - We structure our forward methods as encode_observations and decode_actions - to make it easier to wrap policies with LSTMs. You can do that and use - our LSTM wrapper or implement your own. To port an existing policy - for use with our LSTM wrapper, simply put everything from forward() before - the recurrent cell into encode_observations and put everything after - into decode_actions. - ''' +class DefaultEncoder(nn.Module): def __init__(self, env, hidden_size=128): super().__init__() - self.hidden_size = hidden_size - self.is_multidiscrete = isinstance(env.single_action_space, - pufferlib.spaces.MultiDiscrete) - self.is_continuous = isinstance(env.single_action_space, - pufferlib.spaces.Box) - - self.input_size = hidden_size - self.obs_shape = env.single_observation_space.shape try: self.is_dict_obs = isinstance(env.env.observation_space, pufferlib.spaces.Dict) except: self.is_dict_obs = isinstance(env.observation_space, pufferlib.spaces.Dict) if self.is_dict_obs: - self.dtype = pufferlib.pytorch.nativize_dtype(env.emulated) + dtype = pufferlib.pytorch.nativize_dtype(env.emulated) input_size = int(sum(np.prod(v.shape) for v in env.env.observation_space.values())) - self.encoder = nn.Linear(input_size, self.hidden_size) else: num_obs = np.prod(env.single_observation_space.shape) - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False - torch.manual_seed(42) - torch.cuda.manual_seed(42) - self.encoder = torch.nn.Sequential( - pufferlib.pytorch.layer_init(nn.Linear(num_obs, hidden_size)), - nn.GELU(), - ) - + dtype = env.single_observation_space.dtype + + self.dtype = dtype + self.encoder = torch.nn.Sequential( + pufferlib.pytorch.layer_init(nn.Linear(num_obs, hidden_size)), + nn.GELU(), + ) + + def forward(self, observations): + batch_size = observations.shape[0] + if self.is_dict_obs: + observations = pufferlib.pytorch.nativize_tensor(observations, self.dtype) + observations = torch.cat([v.view(batch_size, -1) for v in observations.values()], dim=1) + else: + observations = observations.view(batch_size, -1) + + return self.encoder(observations.float()) + +class DefaultDecoder(nn.Module): + def __init__(self, env, hidden_size=128): + super().__init__() + self.is_multidiscrete = isinstance(env.single_action_space, + pufferlib.spaces.MultiDiscrete) + self.is_continuous = isinstance(env.single_action_space, + pufferlib.spaces.Box) + if self.is_multidiscrete: self.action_nvec = tuple(env.single_action_space.nvec) num_atns = sum(self.action_nvec) @@ -143,32 +140,8 @@ def __init__(self, env, hidden_size=128): self.value = pufferlib.pytorch.layer_init( nn.Linear(hidden_size, 1), std=1) - self.lstm = nn.LSTM(hidden_size, hidden_size) - nn.init.orthogonal_(self.lstm.weight_ih_l0, 1.0) - nn.init.orthogonal_(self.lstm.weight_hh_l0, 1.0) - self.lstm.bias_ih_l0.data.zero_() - self.lstm.bias_hh_l0.data.zero_() - self.cell = torch.nn.LSTMCell(hidden_size, hidden_size) - self.cell.weight_ih = self.lstm.weight_ih_l0 - self.cell.weight_hh = self.lstm.weight_hh_l0 - self.cell.bias_ih = self.lstm.bias_ih_l0 - self.cell.bias_hh = self.lstm.bias_hh_l0 - - def encode_observations(self, observations, state=None): - '''Encodes a batch of observations into hidden states. Assumes - no time dimension (handled by LSTM wrappers).''' - batch_size = observations.shape[0] - if self.is_dict_obs: - observations = pufferlib.pytorch.nativize_tensor(observations, self.dtype) - observations = torch.cat([v.view(batch_size, -1) for v in observations.values()], dim=1) - else: - observations = observations.view(batch_size, -1) - return self.encoder(observations.float()) - - def decode_actions(self, hidden): - '''Decodes a batch of hidden states into (multi)discrete actions. - Assumes no time dimension (handled by LSTM wrappers).''' + def forward(self, hidden): if self.is_multidiscrete: logits = self.decoder(hidden).split(self.action_nvec, dim=1) elif self.is_continuous: @@ -181,135 +154,194 @@ def decode_actions(self, hidden): values = self.value(hidden) return logits, values + -class MinGRUWrapper(nn.Module): - def __init__(self, env, policy, input_size=128, hidden_size=128, num_layers=1): +class Default(nn.Module): + def __init__(self, env, hidden_size=128, num_layers=1): super().__init__() - self.obs_shape = env.single_observation_space.shape - self.policy = policy - - self.input_size = input_size self.hidden_size = hidden_size + self.input_size = hidden_size self.num_layers = num_layers - self.is_continuous = self.policy.is_continuous - - self.mingru = nn.ModuleList([minGRU(hidden_size) for _ in range(num_layers)]) - - def forward_eval(self, observations, state): + self.obs_shape = env.single_observation_space.shape + self.encoder = DefaultEncoder(env, hidden_size) + self.decoder = DefaultDecoder(env, hidden_size) + + self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers=num_layers) + self.cell = nn.ModuleList([torch.nn.LSTMCell(hidden_size, hidden_size) for _ in range(num_layers)]) + for i in range(num_layers): + cell = self.cell[i] + + w_ih = getattr(self.lstm, f'weight_ih_l{i}') + w_hh = getattr(self.lstm, f'weight_hh_l{i}') + b_ih = getattr(self.lstm, f'bias_ih_l{i}') + b_hh = getattr(self.lstm, f'bias_hh_l{i}') + + nn.init.orthogonal_(w_ih, 1.0) + nn.init.orthogonal_(w_hh, 1.0) + b_ih.data.zero_() + b_hh.data.zero_() + + cell.weight_ih = w_ih + cell.weight_hh = w_hh + cell.bias_ih = b_ih + cell.bias_hh = b_hh + + def initial_state(self, batch_size, device): + h = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device) + c = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device) + return h, c + + def forward_eval(self, x, state): '''Forward function for inference. 3x faster than using LSTM directly''' - assert state.shape[1] == observations.shape[0] - observations = observations.unsqueeze(1) - states = state.unsqueeze(2) - logits, values, state = self.forward(observations, states) - state = state.squeeze(2) - return logits, values, state + assert state[0].shape[1] == state[1].shape[1] == x.shape[0], 'LSTM state must be (h, c)' + h = self.encoder(x) + lstm_h, lstm_c = state + for i in range(self.num_layers): + h, c = self.cell[i](h, (lstm_h[i], lstm_c[i])) + lstm_h[i] = h + lstm_c[i] = c - def forward(self, observations, state): + logits, values = self.decoder(h) + return logits, values, (lstm_h, lstm_c) + + def forward(self, x): '''Forward function for training. Uses LSTM for fast time-batching''' - x = observations x_shape, space_shape = x.shape, self.obs_shape x_n, space_n = len(x_shape), len(space_shape) - if x_shape[-space_n:] != space_shape: - raise ValueError('Invalid input tensor shape', x.shape) + assert x_shape[-space_n:] == space_shape, f'Invalid input tensor shape {x.shape} != {space_shape}' - if x_n == space_n + 1: - B, TT = x_shape[0], 1 - elif x_n == space_n + 2: - B, TT = x_shape[:2] - else: - raise ValueError('Invalid input tensor shape', x.shape) + B, TT = x_shape[:2] + x = x.reshape(B*TT, *space_shape) + h = self.encoder(x) + assert h.shape == (B*TT, self.input_size) + h = h.reshape(B, TT, self.input_size) - assert state.shape[1] == B + h = h.transpose(0, 1) + h, (lstm_h, lstm_c) = self.lstm.forward(h) + h = h.transpose(0, 1) - x = x.reshape(B*TT, *space_shape) - hidden = self.policy.encode_observations(x, state) - assert hidden.shape == (B*TT, self.input_size) + flat_hidden = h.reshape(B*TT, self.hidden_size) + logits, values = self.decoder(flat_hidden) + values = values.reshape(B, TT) + return logits, values - hidden = hidden.reshape(B, TT, self.input_size) +class MinGRU(nn.Module): + def __init__(self, env, hidden_size=128, num_layers=1, **kwargs): + super().__init__() + self.hidden_size = hidden_size + self.input_size = hidden_size + self.obs_shape = env.single_observation_space.shape + self.encoder = DefaultEncoder(env, hidden_size) + self.decoder = DefaultDecoder(env, hidden_size) - #hidden = hidden.transpose(0, 1) - #states = list(state.split(self.num_layers, dim=0)) - states = [state[i] for i in range(self.num_layers)] + self.num_layers = num_layers + self.mingru = nn.ModuleList([MinGRULayer(hidden_size) for _ in range(num_layers)]) + + def initial_state(self, batch_size, device): + state = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device) + return (state,) + + def forward_eval(self, x, state): + state = state[0] + assert state.shape[1] == x.shape[0] + h = self.encoder(x) + h = h.unsqueeze(1) + state = state.unsqueeze(2) for i in range(self.num_layers): - hidden, states[i] = self.mingru[i](hidden, states[i]) + h, state[i] = self.mingru[i](h, state[i]) - #hidden = hidden.transpose(0, 1) + h = h.squeeze(1) + state = state.squeeze(2) + logits, values = self.decoder(h) + return logits, values, (state,) - flat_hidden = hidden.reshape(B*TT, self.hidden_size) - logits, values = self.policy.decode_actions(flat_hidden) + def forward(self, x): + '''Forward function for training. Uses LSTM for fast time-batching''' + x_shape, space_shape = x.shape, self.obs_shape + x_n, space_n = len(x_shape), len(space_shape) + assert x_shape[-space_n:] == space_shape, f'Invalid input tensor shape {x.shape} != {space_shape}' + + B, TT = x_shape[:2] + x = x.reshape(B*TT, *space_shape) + h = self.encoder(x) + assert h.shape == (B*TT, self.input_size) + h = h.reshape(B, TT, self.input_size) + + state = self.initial_state(B, h.device)[0].unsqueeze(2) + for i in range(self.num_layers): + h, _ = self.mingru[i](h, state[i]) + + flat_hidden = h.reshape(B*TT, self.hidden_size) + logits, values = self.decoder(flat_hidden) values = values.reshape(B, TT) - state = torch.stack(states, dim=0).detach() - return logits, values, state + return logits, values -class MambaWrapper(nn.Module): - def __init__(self, env, policy, input_size=128, hidden_size=128, num_layers=1): +class Mamba(nn.Module): + def __init__(self, env, hidden_size=128, num_layers=1, d_state=32, d_conv=4, expand=1): super().__init__() self.obs_shape = env.single_observation_space.shape - self.policy = policy - - self.input_size = input_size self.hidden_size = hidden_size - self.num_layers = num_layers - self.is_continuous = self.policy.is_continuous + self.input_size = hidden_size + self.obs_shape = env.single_observation_space.shape + self.encoder = DefaultEncoder(env, hidden_size) + self.decoder = DefaultDecoder(env, hidden_size) + self.num_layers = num_layers from mamba_ssm import Mamba2 - self.mamba = Mamba2(d_model=hidden_size, d_state=64, d_conv=4, expand=2) + self.mamba = nn.ModuleList([Mamba2(d_model=hidden_size, d_state=d_state, d_conv=d_conv, expand=expand) + for _ in range(num_layers)]) - def init_state(self, batch_size, device): + def initial_state(self, batch_size, device): conv_state = torch.zeros( + self.num_layers, batch_size, - self.mamba.d_conv, - self.mamba.conv1d.weight.shape[0], + self.mamba[0].d_conv, + self.mamba[0].conv1d.weight.shape[0], device=device, - dtype=self.mamba.conv1d.weight.dtype, - ).transpose(1, 2).to(device) + dtype=self.mamba[0].conv1d.weight.dtype, + ).transpose(2, 3).to(device) ssm_state = torch.zeros( + self.num_layers, batch_size, - self.mamba.nheads, - self.mamba.headdim, - self.mamba.d_state, + self.mamba[0].nheads, + self.mamba[0].headdim, + self.mamba[0].d_state, device=device, - dtype=self.mamba.in_proj.weight.dtype, + dtype=self.mamba[0].in_proj.weight.dtype, ).to(device) return conv_state, ssm_state - def forward_eval(self, observations, conv_state, ssm_state): - '''Forward function for inference. 3x faster than using LSTM directly''' - hidden = self.policy.encode_observations(observations, None) - hidden = hidden.unsqueeze(1) - hidden, conv_state, ssm_state = self.mamba.step(hidden, conv_state, ssm_state) - hidden = hidden.squeeze(1) - logits, values = self.policy.decode_actions(hidden) - return logits, values, conv_state, ssm_state + def forward_eval(self, x, state): + h = self.encoder(x) + h = h.unsqueeze(1) + conv_state, ssm_state = state + for i in range(self.num_layers): + h, conv_state[i], ssm_state[i] = self.mamba[i].step(h, conv_state[i], ssm_state[i]) - def forward(self, observations): - '''Forward function for training. Uses LSTM for fast time-batching''' - x = observations + state = (conv_state, ssm_state) + h = h.squeeze(1) + logits, values = self.decoder(h) + return logits, values, state + + def forward(self, x): x_shape, space_shape = x.shape, self.obs_shape x_n, space_n = len(x_shape), len(space_shape) - if x_shape[-space_n:] != space_shape: - raise ValueError('Invalid input tensor shape', x.shape) - - if x_n == space_n + 1: - B, TT = x_shape[0], 1 - elif x_n == space_n + 2: - B, TT = x_shape[:2] - else: - raise ValueError('Invalid input tensor shape', x.shape) + assert x_shape[-space_n:] == space_shape, f'Invalid input tensor shape {x.shape} != {space_shape}' - x = x.reshape(B*TT, *space_shape) - hidden = self.policy.encode_observations(x, None) - assert hidden.shape == (B*TT, self.input_size) + B, TT = x_shape[:2] + x = x.reshape(B*TT, *space_shape) + h = self.encoder(x) + assert h.shape == (B*TT, self.input_size) + h = h.reshape(B, TT, self.input_size) - hidden = hidden.reshape(B, TT, self.input_size) - hidden = self.mamba(hidden) + for i in range(self.num_layers): + h = self.mamba[i](h) - flat_hidden = hidden.reshape(B*TT, self.hidden_size) - logits, values = self.policy.decode_actions(flat_hidden) + flat_hidden = h.reshape(B*TT, self.hidden_size) + logits, values = self.decoder(flat_hidden) values = values.reshape(B, TT) return logits, values - class LSTMWrapper(nn.Module): def __init__(self, env, policy, hidden_size=128): '''Wraps your policy with an LSTM without letting you shoot yourself in the diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index 5bd2776fc..6de522dc4 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -13,8 +13,9 @@ import pufferlib.models from pufferlib.models import Default as Policy +from pufferlib.models import MinGRU, Mamba from pufferlib.models import Convolutional as Conv -Recurrent = pufferlib.models.MambaWrapper +Recurrent = pufferlib.models.LSTMWrapper from pufferlib.pytorch import layer_init, _nativize_dtype, nativize_tensor import numpy as np diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index 796d183da..e7471c0af 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -89,16 +89,8 @@ def close(self): class PuffeRL: - #def __init__(self, config, vecenv, policy, logger=None, verbose=True): - #def __init__(self, config, policy, logger=None, verbose=True): def __init__(self, config, logger=None, verbose=True): # Backend perf optimization - #torch.set_float32_matmul_precision('high') - torch.backends.cudnn.deterministic = True #config['torch_deterministic'] - torch.backends.cudnn.benchmark = False #True - torch.backends.cuda.matmul.allow_tf32 = False - torch.backends.cudnn.allow_tf32 = False - num_envs = 4096 self.num_envs = num_envs grid_size = 11 @@ -167,15 +159,7 @@ def __init__(self, config, logger=None, verbose=True): # LSTM if config['use_rnn']: - n = vecenv.agents_per_batch - h = policy.hidden_size - #l = policy.num_layers - #self.state = {i*n: torch.zeros(l, n, h, device=device) for i in range(total_agents//n)} - #conv_states, ssm_states = policy.init_state(total_agents, device) - #self.conv_state = {i*n: conv_states[i*n:(i+1)*n] for i in range(total_agents//n)} - #self.ssm_state = {i*n: ssm_states[i*n:(i+1)*n] for i in range(total_agents//n)} n = self.agents_per_batch - #h = policy.hidden_size h = 128 self.lstm_h = {i*n: torch.zeros(n, h, device=device) for i in range(total_agents//n)} self.lstm_c = {i*n: torch.zeros(n, h, device=device) for i in range(total_agents//n)} @@ -192,33 +176,11 @@ def __init__(self, config, logger=None, verbose=True): f'minibatch_size {self.minibatch_size} must be divisible by bptt_horizon {horizon}' ) - # Torch compile - self.uncompiled_policy = policy - self.policy = policy - if config['compile']: - self.policy = torch.compile(policy, mode=config['compile_mode']) - self.policy.forward_eval = torch.compile(policy.forward_eval, mode=config['compile_mode']) - pufferlib.pytorch.sample_logits = torch.compile(pufferlib.pytorch.sample_logits, mode=config['compile_mode']) - - self.muon = torch.optim.Muon( - [e for e in self.policy.parameters() if e.dim() == 2], - lr=config['learning_rate'], - eps=config['adam_eps'], - adjust_lr_fn='match_rms_adamw' - ) - self.adam = torch.optim.Adam( - [e for e in self.policy.parameters() if e.dim() != 2], - lr=config['learning_rate'], - betas=(config['adam_beta1'], config['adam_beta2']), - eps=config['adam_eps'], - ) - ''' # Logging self.logger = logger if logger is None: self.logger = Logger(config) - # Learning rate scheduler epochs = config['total_timesteps'] // config['batch_size'] self.total_epochs = epochs @@ -379,15 +341,6 @@ def train(self): # Reprioritize experience profile('train_misc', epoch) - #if config['anneal_lr']: - # self.scheduler.step() - - #y_pred = self.values.flatten() - #y_true = advantages.flatten() + self.values.flatten() - #var_y = y_true.var() - #explained_var = torch.nan if var_y == 0 else (1 - (y_true - y_pred).var() / var_y).item() - #losses['explained_variance'] = explained_var - profile.end() logs = None self.epoch += 1 diff --git a/pufferlib/python_pufferl.py b/pufferlib/python_pufferl.py index d57bad9ee..d38e52157 100644 --- a/pufferlib/python_pufferl.py +++ b/pufferlib/python_pufferl.py @@ -52,7 +52,7 @@ class PuffeRL: def __init__(self, config, vecenv, policy, logger=None, verbose=True): # Backend perf optimization - torch.set_float32_matmul_precision('high') + torch.backends.cudnn.conv.fp32_precision = 'tf32' torch.backends.cudnn.deterministic = config['torch_deterministic'] torch.backends.cudnn.benchmark = True @@ -104,12 +104,10 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): self.ep_indices = torch.arange(total_agents, device=device, dtype=torch.int32) self.free_idx = total_agents - # LSTM + # Recurrent cell if config['use_rnn']: n = vecenv.agents_per_batch - h = policy.hidden_size - self.lstm_h = {i*n: torch.zeros(n, h, device=device) for i in range(total_agents//n)} - self.lstm_c = {i*n: torch.zeros(n, h, device=device) for i in range(total_agents//n)} + self.state = {i*n: policy.initial_state(n, device=device) for i in range(total_agents//n)} # Minibatching & gradient accumulation minibatch_size = config['minibatch_size'] @@ -127,41 +125,29 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): self.policy = policy if config['compile']: self.policy = torch.compile(policy, mode=config['compile_mode']) - self.policy.forward_eval = torch.compile(policy, mode=config['compile_mode']) + self.policy.forward_eval = torch.compile(policy.forward_eval, mode=config['compile_mode']) pufferlib.pytorch.sample_logits = torch.compile(pufferlib.pytorch.sample_logits, mode=config['compile_mode']) - # Optimizer - if config['optimizer'] == 'adam': - optimizer = torch.optim.Adam( - self.policy.parameters(), - lr=config['learning_rate'], - betas=(config['adam_beta1'], config['adam_beta2']), - eps=config['adam_eps'], - ) - elif config['optimizer'] == 'muon': - from heavyball import ForeachMuon - warnings.filterwarnings(action='ignore', category=UserWarning, module=r'heavyball.*') - import heavyball.utils - heavyball.utils.compile_mode = config['compile_mode'] if config['compile'] else None - optimizer = ForeachMuon( - self.policy.parameters(), - lr=config['learning_rate'], - betas=(config['adam_beta1'], config['adam_beta2']), - eps=config['adam_eps'], - ) - else: - raise ValueError(f'Unknown optimizer: {config["optimizer"]}') - - self.optimizer = optimizer + self.muon = torch.optim.Muon( + [e for e in self.policy.parameters() if e.dim() == 2], + lr=config['learning_rate'], + eps=config['adam_eps'], + adjust_lr_fn='match_rms_adamw' + ) + self.adam = torch.optim.Adam( + [e for e in self.policy.parameters() if e.dim() != 2], + lr=config['learning_rate'], + betas=(config['adam_beta1'], config['adam_beta2']), + eps=config['adam_eps'], + ) # Logging self.logger = logger if logger is None: - self.logger = Logger(config) + self.logger = Logger(config, policy.__class__.__name__) # Learning rate scheduler epochs = max(1, config['total_timesteps'] // config['batch_size']) - self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs) self.total_epochs = epochs # Automatic mixed precision @@ -212,9 +198,8 @@ def evaluate(self): device = config['device'] if config['use_rnn']: - for k in self.lstm_h: - self.lstm_h[k] = torch.zeros(self.lstm_h[k].shape, device=device) - self.lstm_c[k] = torch.zeros(self.lstm_c[k].shape, device=device) + for k, tensors in self.state.items(): + self.state[k] = [torch.zeros_like(e) for e in tensors] self.full_rows = 0 while self.full_rows < self.segments: @@ -235,26 +220,17 @@ def evaluate(self): profile('eval_forward', epoch) with torch.no_grad(), self.amp_context: - state = dict( - reward=r, - done=d, - env_id=env_id, - mask=mask, - ) - if config['use_rnn']: - state['lstm_h'] = self.lstm_h[env_id.start] - state['lstm_c'] = self.lstm_c[env_id.start] + state = self.state[env_id.start] - logits, value = self.policy.forward_eval(o_device, state) + logits, value, state = self.policy.forward_eval(o_device, state) action, logprob, _ = pufferlib.pytorch.sample_logits(logits) r = torch.clamp(r, -1, 1) profile('eval_copy', epoch) with torch.no_grad(): if config['use_rnn']: - self.lstm_h[env_id.start] = state['lstm_h'] - self.lstm_c[env_id.start] = state['lstm_c'] + self.state[env_id.start] = state # Fast path for fully vectorized envs l = self.ep_lengths[env_id.start].item() @@ -320,6 +296,13 @@ def train(self): anneal_beta = b0 + (1 - b0)*a*self.epoch/self.total_epochs self.ratio[:] = 1 + learning_rate = config['learning_rate'] + if config['anneal_lr'] and self.epoch > 0: + lr_ratio = self.epoch / self.total_epochs + learning_rate = learning_rate * 0.5 * (1 + np.cos(np.pi * lr_ratio)) + self.muon.param_groups[0]['lr'] = learning_rate + self.adam.param_groups[0]['lr'] = learning_rate + num_minibatches = config['num_minibatches'] for mb in range(num_minibatches): profile('train_misc', epoch, nest=True) @@ -354,14 +337,7 @@ def train(self): if not config['use_rnn']: mb_obs = mb_obs.reshape(-1, *self.vecenv.single_observation_space.shape) - state = dict( - action=mb_actions, - lstm_h=None, - lstm_c=None, - ) - - logits, newvalue = self.policy(mb_obs, state) - #print("logits Py", logits.min().item()) + logits, newvalue = self.policy(mb_obs) actions, newlogprob, entropy = pufferlib.pytorch.sample_logits(logits, action=mb_actions) profile('train_misc', epoch) @@ -412,14 +388,13 @@ def train(self): loss.backward() if (mb + 1) % self.accumulate_minibatches == 0: torch.nn.utils.clip_grad_norm_(self.policy.parameters(), config['max_grad_norm']) - self.optimizer.step() - self.optimizer.zero_grad() + self.muon.step() + self.adam.step() + self.muon.zero_grad() + self.adam.zero_grad() # Reprioritize experience profile('train_misc', epoch) - if config['anneal_lr']: - self.scheduler.step() - y_pred = self.values.flatten() y_true = advantages.flatten() + self.values.flatten() var_y = y_true.var() @@ -463,7 +438,7 @@ def mean_and_log(self): 'agent_steps': agent_steps, 'uptime': time.time() - self.start_time, 'epoch': int(dist_sum(self.epoch, device)), - 'learning_rate': self.optimizer.param_groups[0]["lr"], + 'learning_rate': self.adam.param_groups[0]["lr"], **{f'environment/{k}': v for k, v in self.stats.items()}, **{f'losses/{k}': v for k, v in self.losses.items()}, **{f'performance/{k}': v['elapsed'] for k, v in self.profile}, @@ -511,7 +486,8 @@ def save_checkpoint(self): torch.save(self.uncompiled_policy.state_dict(), model_path) state = { - 'optimizer_state_dict': self.optimizer.state_dict(), + 'adam_state_dict': self.adam.state_dict(), + 'muon_state_dict': self.muon.state_dict(), 'global_step': self.global_step, 'agent_step': self.global_step, 'update': self.epoch, @@ -792,9 +768,9 @@ def downsample(arr, m): return np.concatenate([downsampled, [last]]) class Logger: - def __init__(self, args): + def __init__(self, args, policy_name): self.run_id = str(int(1000*time.time())) - root = os.path.join(args['data_dir'], 'logs', args['env']) + root = os.path.join(args['data_dir'], 'logs', policy_name, args['env']) if not os.path.exists(root): os.makedirs(root) diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py index 7133b513c..16642c2b1 100644 --- a/pufferlib/sweep.py +++ b/pufferlib/sweep.py @@ -129,7 +129,7 @@ def unnormalize(self, value): def _params_from_puffer_sweep(sweep_config): param_spaces = {} for name, param in sweep_config.items(): - if name in ('method', 'metric', 'goal', 'downsample', 'use_gpu', 'prune_pareto'): + if name in ('method', 'metric', 'goal', 'downsample', 'use_gpu', 'prune_pareto', 'max_cost'): continue assert isinstance(param, dict) diff --git a/pyproject.toml b/pyproject.toml index a7d7ea99d..e31ba58d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -240,7 +240,7 @@ puffer = "pufferlib.pufferl:main" Homepage = "https://puffer.ai" [build-system] -requires = ["setuptools", "wheel", "Cython", "numpy<2.0", "torch"] +requires = ["setuptools", "wheel", "Cython", "numpy<2.0", "torch", "pybind11"] build-backend = "setuptools.build_meta" [tool.uv] diff --git a/setup.py b/setup.py index b1043d8ea..d3bf8c339 100644 --- a/setup.py +++ b/setup.py @@ -281,7 +281,7 @@ def run(self): if not NO_TRAIN: install_requires += [ - 'torch', + 'torch>=2.9', 'psutil', 'nvidia-ml-py', 'rich', From 35530d59d374994d0ea1db06064f52f060028a42 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 6 Nov 2025 17:33:13 +0000 Subject: [PATCH 097/188] Minor --- pufferlib/ocean/constellation/constellation.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pufferlib/ocean/constellation/constellation.c b/pufferlib/ocean/constellation/constellation.c index c9800c3bf..81f8121b8 100644 --- a/pufferlib/ocean/constellation/constellation.c +++ b/pufferlib/ocean/constellation/constellation.c @@ -206,13 +206,12 @@ const char* format_tick_label(double value) { } if (fabs(value) < 0.01 || fabs(value) > 10000) { - snprintf(buffer, sizeof(buffer), "%.2e", value); + snprintf(buffer, sizeof(buffer), "%.2e\0", value); } else { - snprintf(buffer, sizeof(buffer), "%.*f", precision, value); - - char *end = buffer + strlen(buffer) - 1; - while (end > buffer && *end == '0') *end-- = '\0'; - if (end > buffer && *end == '.') *end = '\0'; + snprintf(buffer, sizeof(buffer), "%.2f\0", value); + //char *end = buffer + strlen(buffer) - 1; + //while (end > buffer && *end == '0') *end-- = '\0'; + //if (end > buffer && *end == '.') *end = '\0'; } return buffer; @@ -736,7 +735,7 @@ void compute_constellation(Dataset *data, int* env_idxs, float* env_dists, int main(void) { - FILE *file = fopen("pufferlib/ocean/constellation/all_cache.json", "r"); + FILE *file = fopen("pufferlib/ocean/constellation/default.json", "r"); if (!file) { printf("Error opening file\n"); return 1; @@ -930,7 +929,6 @@ int main(void) { PlotArgs args2 = DEFAULT_PLOT_ARGS; RenderTexture2D fig2 = LoadRenderTexture(args2.width, args2.height); //SetTextureFilter(fig2.texture, TEXTURE_FILTER_POINT); - args2.left_margin = 50; args2.right_margin = 50; args2.log_x = true; int fig2_env_idx = 1; @@ -938,7 +936,7 @@ int main(void) { bool fig2_x_active = false; int fig2_x_idx = 1; bool fig2_y_active = false; - int fig2_y_idx = 2; + int fig2_y_idx = 3; int fig2_color_idx = 1; bool fig2_color_active = false; @@ -1130,6 +1128,7 @@ int main(void) { args2.x_label = x_label; args2.y_label = y_label; args2.top_margin = 20; + args2.left_margin = 100; BeginTextureMode(fig2); ClearBackground(PUFF_BACKGROUND); From 6efab69cc5a981cfe234f961d1ea4014ed5fafdc Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 6 Nov 2025 18:22:03 +0000 Subject: [PATCH 098/188] Ready for sweeps --- pufferlib/config/ocean/breakout.ini | 4 +-- pufferlib/config/ocean/g2048.ini | 9 +++--- pufferlib/config/ocean/tetris.ini | 50 ++++++++++++++++++++++------- pufferlib/models.py | 2 +- pufferlib/python_pufferl.py | 42 ++++++++++++++++++------ 5 files changed, 80 insertions(+), 27 deletions(-) diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index 39c4320d2..c4e848625 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -1,7 +1,7 @@ [base] package = ocean env_name = puffer_breakout -policy_name = Mamba +policy_name = MinGRU rnn_name = Recurrent [vec] @@ -27,7 +27,7 @@ continuous = 0 [policy] hidden_size = 128 -num_layers = 4 +num_layers = 3 d_state = 32 d_conv = 4 expand = 2 diff --git a/pufferlib/config/ocean/g2048.ini b/pufferlib/config/ocean/g2048.ini index 620d2f38a..50e82a574 100644 --- a/pufferlib/config/ocean/g2048.ini +++ b/pufferlib/config/ocean/g2048.ini @@ -1,7 +1,7 @@ [base] package = ocean env_name = puffer_g2048 -policy_name = MinGRU +policy_name = Policy rnn_name = Recurrent [rnn] @@ -15,8 +15,8 @@ num_envs = 4 num_envs = 4096 [policy] -hidden_size = 128 -num_layers = 2 +hidden_size = 256 +num_layers = 1 d_state = 32 d_conv = 4 expand = 2 @@ -84,7 +84,8 @@ vtrace_rho_clip = 1.6 [sweep] metric = score goal = maximize -max_cost = 300 +#max_cost = 300 +max_cost = 3000 [sweep.train.total_timesteps] distribution = log_normal diff --git a/pufferlib/config/ocean/tetris.ini b/pufferlib/config/ocean/tetris.ini index d371db4c8..15e69b07b 100644 --- a/pufferlib/config/ocean/tetris.ini +++ b/pufferlib/config/ocean/tetris.ini @@ -18,10 +18,45 @@ n_noise_obs = 0 [policy] hidden_size = 256 +num_layers = 1 +d_state = 32 +d_conv = 4 +expand = 2 -[rnn] -input_size = 256 -hidden_size = 256 +[sweep.policy.hidden_size] +distribution = uniform_pow2 +min = 16 +max = 512 +mean = 128 +scale = auto + +[sweep.policy.num_layers] +distribution = int_uniform +min = 1 +max = 4 +mean = 2 +scale = auto + +[sweep.policy.d_state] +distribution = uniform_pow2 +min = 8 +max = 128 +mean = 32 +scale = auto + +[sweep.policy.d_conv] +distribution = int_uniform +min = 1 +max = 4 +mean = 2 +scale = auto + +[sweep.policy.expand] +distribution = int_uniform +min = 1 +max = 2 +mean = 1 +scale = auto [train] # https://wandb.ai/kywch/pufferlib/runs/era6a8p6?nw=nwuserkywch @@ -50,7 +85,7 @@ vtrace_rho_clip = 0.70 [sweep] metric = score goal = maximize -max_cost = 300 +max_cost = 3600 [sweep.train.total_timesteps] distribution = log_normal @@ -80,13 +115,6 @@ mean = 0.95 max = 0.999 scale = auto -[sweep.policy.hidden_size] -distribution = uniform_pow2 -min = 16 -max = 1024 -mean = 128 -scale = auto - [sweep.env.num_envs] distribution = uniform_pow2 min = 1 diff --git a/pufferlib/models.py b/pufferlib/models.py index e18e72295..d40f36394 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -157,7 +157,7 @@ def forward(self, hidden): class Default(nn.Module): - def __init__(self, env, hidden_size=128, num_layers=1): + def __init__(self, env, hidden_size=128, num_layers=1, **kwargs): super().__init__() self.hidden_size = hidden_size self.input_size = hidden_size diff --git a/pufferlib/python_pufferl.py b/pufferlib/python_pufferl.py index d38e52157..ddc1da423 100644 --- a/pufferlib/python_pufferl.py +++ b/pufferlib/python_pufferl.py @@ -128,6 +128,26 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): self.policy.forward_eval = torch.compile(policy.forward_eval, mode=config['compile_mode']) pufferlib.pytorch.sample_logits = torch.compile(pufferlib.pytorch.sample_logits, mode=config['compile_mode']) + import heavyball + from heavyball import ForeachMuon + warnings.filterwarnings(action='ignore', category=UserWarning, module=r'heavyball.*') + heavyball.utils.compile_mode = "default" + + # # optionally a little bit better/faster alternative to newtonschulz iteration + # import heavyball.utils + # heavyball.utils.zeroth_power_mode = 'thinky_polar_express' + + # heavyball_momentum=True introduced in heavyball 2.1.1 + # recovers heavyball-1.7.2 behaviour - previously swept hyperparameters work well + self.optimizer = ForeachMuon( + self.policy.parameters(), + lr=config['learning_rate'], + betas=(config['adam_beta1'], config['adam_beta2']), + eps=config['adam_eps'], + heavyball_momentum=True, + ) + + ''' self.muon = torch.optim.Muon( [e for e in self.policy.parameters() if e.dim() == 2], lr=config['learning_rate'], @@ -140,6 +160,7 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): betas=(config['adam_beta1'], config['adam_beta2']), eps=config['adam_eps'], ) + ''' # Logging self.logger = logger @@ -300,8 +321,9 @@ def train(self): if config['anneal_lr'] and self.epoch > 0: lr_ratio = self.epoch / self.total_epochs learning_rate = learning_rate * 0.5 * (1 + np.cos(np.pi * lr_ratio)) - self.muon.param_groups[0]['lr'] = learning_rate - self.adam.param_groups[0]['lr'] = learning_rate + self.optimizer.param_groups[0]['lr'] = learning_rate + #self.muon.param_groups[0]['lr'] = learning_rate + #self.adam.param_groups[0]['lr'] = learning_rate num_minibatches = config['num_minibatches'] for mb in range(num_minibatches): @@ -388,10 +410,12 @@ def train(self): loss.backward() if (mb + 1) % self.accumulate_minibatches == 0: torch.nn.utils.clip_grad_norm_(self.policy.parameters(), config['max_grad_norm']) - self.muon.step() - self.adam.step() - self.muon.zero_grad() - self.adam.zero_grad() + self.optimizer.step() + self.optimizer.zero_grad() + #self.muon.step() + #self.adam.step() + #self.muon.zero_grad() + #self.adam.zero_grad() # Reprioritize experience profile('train_misc', epoch) @@ -438,7 +462,7 @@ def mean_and_log(self): 'agent_steps': agent_steps, 'uptime': time.time() - self.start_time, 'epoch': int(dist_sum(self.epoch, device)), - 'learning_rate': self.adam.param_groups[0]["lr"], + #'learning_rate': self.adam.param_groups[0]["lr"], **{f'environment/{k}': v for k, v in self.stats.items()}, **{f'losses/{k}': v for k, v in self.losses.items()}, **{f'performance/{k}': v['elapsed'] for k, v in self.profile}, @@ -486,8 +510,8 @@ def save_checkpoint(self): torch.save(self.uncompiled_policy.state_dict(), model_path) state = { - 'adam_state_dict': self.adam.state_dict(), - 'muon_state_dict': self.muon.state_dict(), + #'adam_state_dict': self.adam.state_dict(), + #'muon_state_dict': self.muon.state_dict(), 'global_step': self.global_step, 'agent_step': self.global_step, 'update': self.epoch, From 86bc81c886b1c5a57e4d553b8e7a39370f3a514b Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Thu, 6 Nov 2025 23:45:46 +0000 Subject: [PATCH 099/188] Initial mingru --- pufferlib/config/default.ini | 1 + pufferlib/config/ocean/squared.ini | 4 +- pufferlib/extensions/pufferlib.cpp | 219 ++++++++++++++++++++++++++--- pufferlib/models.py | 9 +- pufferlib/pufferl.py | 15 +- 5 files changed, 216 insertions(+), 32 deletions(-) diff --git a/pufferlib/config/default.ini b/pufferlib/config/default.ini index 41098da92..015e4f9de 100644 --- a/pufferlib/config/default.ini +++ b/pufferlib/config/default.ini @@ -67,6 +67,7 @@ goal = maximize downsample = 5 use_gpu = True prune_pareto = True +max_cost = 3600 [sweep.train.total_timesteps] distribution = log_normal diff --git a/pufferlib/config/ocean/squared.ini b/pufferlib/config/ocean/squared.ini index d6f8e7ce7..0be91f7f2 100644 --- a/pufferlib/config/ocean/squared.ini +++ b/pufferlib/config/ocean/squared.ini @@ -1,7 +1,7 @@ [base] package = ocean env_name = puffer_squared -policy_name = Policy +policy_name = MinGRU rnn_name = Recurrent [vec] @@ -9,6 +9,8 @@ num_envs = 1 [policy] hidden_size = 128 +num_layers = 1 +expand = 2 [env] num_envs = 4096 diff --git a/pufferlib/extensions/pufferlib.cpp b/pufferlib/extensions/pufferlib.cpp index 65f8fca73..3c93382cf 100644 --- a/pufferlib/extensions/pufferlib.cpp +++ b/pufferlib/extensions/pufferlib.cpp @@ -132,6 +132,186 @@ struct ShareableLSTMCell : public torch::nn::LSTMCellImpl { } }; +class MinGRULayer : public torch::nn::Module { +private: + int64_t dim; + torch::nn::Linear to_hidden_and_gate{nullptr}; + torch::nn::Linear to_out{nullptr}; + +public: + int64_t expansion_factor; + MinGRULayer(int64_t dim, int64_t expansion_factor = 1.) + : dim(dim), expansion_factor(expansion_factor) { + + int dim_inner = int(dim * expansion_factor); + to_hidden_and_gate = register_module("to_hidden_and_gate", + torch::nn::Linear(torch::nn::LinearOptions(dim, 2*dim_inner).bias(false))); + torch::nn::init::orthogonal_(to_hidden_and_gate->weight); + + if (expansion_factor != 1.) { + to_out = register_module("to_out", + torch::nn::Linear(torch::nn::LinearOptions(dim*expansion_factor, dim).bias(false))); + torch::nn::init::orthogonal_(to_out->weight); + } + } + + std::tuple forward(torch::Tensor x, torch::Tensor state = torch::Tensor()) { + TORCH_CHECK(x.dim() == 3, "x must be [B, seq, input_size]"); + TORCH_CHECK(state.dim() == 3, "state must be [B, seq, hidden_size]"); + TORCH_CHECK(x.size(0) == state.size(0), "x and state must have the same batch size"); + + auto seq_len = x.size(1); + auto output = to_hidden_and_gate->forward(x); + auto chunks = output.chunk(2, 2); + auto hidden = chunks[0]; + auto gate = chunks[1]; + + torch::Tensor out; + torch::Tensor next_prev_hidden; + if (seq_len == 1) { + hidden = torch::where(hidden >= 0, hidden + 0.5, hidden.sigmoid()); + gate = gate.sigmoid(); + out = torch::lerp(state, hidden, gate); + next_prev_hidden = out; + } else { + auto log_coeffs = -torch::nn::functional::softplus(gate); + auto log_z = -torch::nn::functional::softplus(-gate); + auto log_tilde_h = torch::where(hidden >= 0, + (torch::nn::functional::relu(hidden) + 0.5).log(), + -torch::nn::functional::softplus(-hidden)); + auto log_values = log_z + log_tilde_h; + log_values = torch::cat({state.log(), log_values}, 1); + log_coeffs = torch::pad(log_coeffs, {0, 0, 1, 0}); + + // Heinsen associative scan + auto a_star = log_coeffs.cumsum(1); + auto log_h0_plus_b_star = (log_values - a_star).logcumsumexp(1); + auto log_h = a_star + log_h0_plus_b_star; + out = log_h.exp(); + + out = out.narrow(1, out.size(1) - seq_len, seq_len); + next_prev_hidden = out.narrow(1, out.size(1) - 1, 1); + } + + out = to_out->forward(out); + return std::make_tuple(out, next_prev_hidden); + } +}; + + +class PolicyMinGRU : public torch::nn::Module { +private: + int64_t input_size_; + int64_t hidden_size_; + int64_t num_atns_; + torch::nn::Sequential encoder{nullptr}; + torch::nn::Linear decoder{nullptr}; + torch::nn::Linear value{nullptr}; + std::shared_ptr mingru{nullptr}; + +public: + PolicyMinGRU(int64_t input_size, int64_t num_atns, int64_t hidden_size = 128) + : input_size_(input_size), hidden_size_(hidden_size), num_atns_(num_atns) { + encoder = register_module("encoder", torch::nn::Sequential( + torch::nn::Linear(input_size_, hidden_size_), + torch::nn::GELU() + )); + auto encoder_linear = (*encoder)[0]->as(); + torch::nn::init::orthogonal_(encoder_linear->weight, std::sqrt(2.0)); + torch::nn::init::constant_(encoder_linear->bias, 0.0); + + decoder = register_module("decoder", torch::nn::Linear(hidden_size_, num_atns_)); + torch::nn::init::orthogonal_(decoder->weight, 0.01); + torch::nn::init::constant_(decoder->bias, 0.0); + + value = register_module("value", torch::nn::Linear(hidden_size_, 1)); + torch::nn::init::orthogonal_(value->weight, 1.0); + torch::nn::init::constant_(value->bias, 0.0); + + mingru = register_module("mingru", std::make_shared(hidden_size_, 2)); + } + + torch::Tensor initial_state(int64_t batch_size, torch::Device device) { + return torch::zeros( + {1, batch_size, hidden_size_}, + torch::dtype(torch::kFloat32).device(device) + ); + } + + std::tuple forward( + torch::Tensor observations, torch::Tensor state) { + int64_t B = observations.size(0); + + // Ensure flat input: [B, input_size] + TORCH_CHECK(observations.dim() == 2 && observations.size(1) == input_size_, + "Observations must be [B, input_size]"); + TORCH_CHECK(state.dim() == 2 && state.size(0) == B && state.size(1) == hidden_size_*mingru->expansion_factor, + "state must be [1, B, hidden_size]"); + + auto hidden = encoder->forward(observations); + + hidden = hidden.unsqueeze(1); + state = state.unsqueeze(1); // change to 2 for multi-layer + + std::tuple mingru_out; + mingru_out = mingru->forward(hidden, state); + auto hidden_out = std::get<0>(mingru_out); + auto state_out = std::get<1>(mingru_out); + + hidden_out = hidden_out.squeeze(1); + state_out = state_out.squeeze(1); // change to 2 for multi-layer + + auto logits = decoder->forward(hidden_out); + auto values = value->forward(hidden_out); + + return {logits, values, state_out}; + } + + std::tuple forward_train( + torch::Tensor observations, torch::Tensor state) { + + auto x = observations; + auto x_shape = x.sizes(); + + // Expecting [B, TT, input_size] or [B, input_size] + TORCH_CHECK((x.dim() == 2 || x.dim() == 3), + "Observations must be [B, input_size] or [B, TT, input_size]"); + TORCH_CHECK(x.size(-1) == input_size_, + "Last dimension of observations must match input_size"); + + int64_t B = x_shape[0]; + int64_t TT = (x.dim() == 3) ? x_shape[1] : 1; + + TORCH_CHECK(state.dim() == 3 && state.size(0) == B && state.size(2) == hidden_size_*mingru->expansion_factor, + "state must be [B, seq, hidden_size]"); + + // Flatten time steps if needed + if (x.dim() == 3) { + x = x.reshape({B * TT, input_size_}); + } else { + TT = 1; + } + + auto hidden = encoder->forward(x); + + hidden = hidden.reshape({B, TT, hidden_size_}); + + std::tuple mingru_out; + mingru_out = mingru->forward(hidden, state); + hidden = std::get<0>(mingru_out); + + auto flat_hidden = hidden.reshape({-1, hidden_size_}); + auto logits = decoder->forward(flat_hidden); + auto values = value->forward(flat_hidden); + + logits = logits.reshape({B, TT, num_atns_}); + values = values.reshape({B, TT, 1}); + + return {logits, values}; + } +}; + + class PolicyLSTM : public torch::nn::Module { private: int64_t input_size_; @@ -141,7 +321,6 @@ class PolicyLSTM : public torch::nn::Module { torch::nn::Linear decoder{nullptr}; torch::nn::Linear value{nullptr}; torch::nn::LSTM lstm{nullptr}; - //torch::nn::LSTMCell cell{nullptr}; std::shared_ptr cell{nullptr}; public: @@ -170,8 +349,6 @@ class PolicyLSTM : public torch::nn::Module { lstm->named_parameters()["bias_ih_l0"].data().zero_(); lstm->named_parameters()["bias_hh_l0"].data().zero_(); - // ... (your existing lstm creation and init) - cell = register_module("cell", std::make_shared(torch::nn::LSTMCellOptions(hidden_size_, hidden_size_))); cell->set_shared_weights(lstm->named_parameters()["weight_ih_l0"], lstm->named_parameters()["weight_hh_l0"], @@ -298,7 +475,7 @@ void sync_fp16_fp32(pufferlib::PolicyLSTM* policy_16, pufferlib::PolicyLSTM* pol } typedef struct { - PolicyLSTM* policy; + PolicyMinGRU* policy; torch::optim::Adam* optimizer; double lr; int64_t max_epochs; @@ -326,7 +503,7 @@ std::unique_ptr create_pufferl(int64_t input_size, // BF16 reduction (if using bfloat16) torch::globalContext().setAllowBF16ReductionCuBLAS(true); - auto policy = new PolicyLSTM(input_size, num_atns, hidden_size); + auto policy = new PolicyMinGRU(input_size, num_atns, hidden_size); policy->to(torch::kCUDA); policy->to(torch::kBFloat16); @@ -341,7 +518,7 @@ std::unique_ptr create_pufferl(int64_t input_size, } // Updated compiled_evaluate -std::tuple compiled_evaluate( +torch::Tensor compiled_evaluate( pybind11::object pufferl_obj, torch::Tensor envs_tensor, torch::Tensor indices_tensor, @@ -349,8 +526,8 @@ std::tuple compiled_evaluate( torch::Tensor actions, torch::Tensor rewards, torch::Tensor terminals, - torch::Tensor lstm_h, - torch::Tensor lstm_c, + //torch::Tensor lstm_h, + torch::Tensor state, torch::Tensor obs_buffer, torch::Tensor act_buffer, torch::Tensor logprob_buffer, @@ -365,13 +542,11 @@ std::tuple compiled_evaluate( auto& pufferl = pufferl_obj.cast(); auto& policy = pufferl.policy; - lstm_h = lstm_h.to(torch::kBFloat16); - lstm_c = lstm_c.to(torch::kBFloat16); + state = state.to(torch::kBFloat16); for (int64_t i = 0; i < horizon; ++i) { - auto [logits, value, lstm_h_out, lstm_c_out] = policy->forward(obs.to(torch::kBFloat16), lstm_h, lstm_c); - lstm_h = lstm_h_out; - lstm_c = lstm_c_out; + auto [logits, value, state_out] = policy->forward(obs.to(torch::kBFloat16), state); + state = state_out; auto logprobs = torch::log_softmax(logits, 1); auto action = at::multinomial(logprobs.exp(), 1, true).squeeze(1); @@ -393,9 +568,10 @@ std::tuple compiled_evaluate( rewards.clamp_(-1.0f, 1.0f); } - return std::make_tuple(lstm_h, lstm_c); + return state; } +/* std::tuple evaluate_step( pybind11::object pufferl_obj, torch::Tensor envs_tensor, @@ -438,6 +614,7 @@ std::tuple evaluate_step( actions.copy_(action); return std::make_tuple(lstm_h, lstm_c); } +*/ pybind11::dict compiled_train( pybind11::object pufferl_obj, @@ -532,14 +709,13 @@ pybind11::dict compiled_train( // HARDCODED LSTM SIZE 128 // Initial LSTM states (zero or none) - torch::Tensor mb_lstm_h = torch::zeros( - {1, minibatch_segments, 128}, + torch::Tensor mb_state= torch::zeros( + {minibatch_segments, 1, 256}, torch::kBFloat16 ).to(device); - torch::Tensor mb_lstm_c = torch::zeros_like(mb_lstm_h); // Forward pass - auto [logits, newvalue] = policy->forward_train(mb_obs.to(torch::kBFloat16), mb_lstm_h, mb_lstm_c); + auto [logits, newvalue] = policy->forward_train(mb_obs.to(torch::kBFloat16), mb_state); // Flatten for action lookup auto flat_logits = logits.reshape({-1, logits.size(-1)}); @@ -643,7 +819,7 @@ PYBIND11_MODULE(_C, m) { m.def("reset_environments", &reset_environments_cuda); m.def("log_environments", &log_environments_cuda); m.def("compiled_evaluate", &compiled_evaluate); - m.def("evaluate_step", &evaluate_step); + //m.def("evaluate_step", &evaluate_step); m.def("compiled_train", &compiled_train); py::class_(m, "Log") @@ -662,5 +838,10 @@ PYBIND11_MODULE(_C, m) { cls.def(py::init()); cls.def("forward", &pufferlib::PolicyLSTM::forward); cls.def("forward_train", &pufferlib::PolicyLSTM::forward_train); + + py::class_, torch::nn::Module> cls2(m, "PolicyMinGRU"); + cls2.def(py::init()); + cls2.def("forward", &pufferlib::PolicyMinGRU::forward); + cls2.def("forward_train", &pufferlib::PolicyMinGRU::forward_train); } } diff --git a/pufferlib/models.py b/pufferlib/models.py index d40f36394..4e25e0732 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -63,7 +63,6 @@ def forward(self, x, prev_hidden = None): out = torch.lerp(prev_hidden, hidden, gate) if exists(prev_hidden) else (hidden * gate) else: # parallel - log_coeffs = -F.softplus(gate) log_z = -F.softplus(-gate) @@ -226,19 +225,19 @@ def forward(self, x): return logits, values class MinGRU(nn.Module): - def __init__(self, env, hidden_size=128, num_layers=1, **kwargs): + def __init__(self, env, hidden_size=128, num_layers=1, expand=1, **kwargs): super().__init__() self.hidden_size = hidden_size self.input_size = hidden_size self.obs_shape = env.single_observation_space.shape self.encoder = DefaultEncoder(env, hidden_size) self.decoder = DefaultDecoder(env, hidden_size) - + self.expand = expand self.num_layers = num_layers - self.mingru = nn.ModuleList([MinGRULayer(hidden_size) for _ in range(num_layers)]) + self.mingru = nn.ModuleList([MinGRULayer(hidden_size, expansion_factor=expand) for _ in range(num_layers)]) def initial_state(self, batch_size, device): - state = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device) + state = torch.zeros(self.num_layers, batch_size, self.expand*self.hidden_size, device=device) return (state,) def forward_eval(self, x, state): diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index e7471c0af..9635e8f73 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -238,8 +238,8 @@ def evaluate(self): h = 128 #h = self.policy.hidden_size - lstm_h = torch.zeros((n, h), device=device) - lstm_c = torch.zeros((n, h), device=device) + state = torch.zeros((n, 2*h), device=device) + ''' for t in range(self.config['bptt_horizon']): @@ -265,7 +265,7 @@ def evaluate(self): self.vecenv.step(self.actions[:, t]) ''' - lstm_h, lstm_c = _C.compiled_evaluate( + state = _C.compiled_evaluate( self.pufferl_cpp, self.vecenv.env, self.vecenv.indices, @@ -273,8 +273,7 @@ def evaluate(self): self.vecenv.actions, self.vecenv.rewards, self.vecenv.terminals, - lstm_h, - lstm_c, + state, self.observations, self.actions, self.logprobs, @@ -869,7 +868,7 @@ def check(env_name): print('Check passed') -def train(env_name, args=None, vecenv=None, policy=None, logger=None, verbose=True): +def train(env_name, args=None, vecenv=None, policy=None, logger=None, verbose=True, should_stop_early=None): args = args or load_config(env_name) # Assume TorchRun DDP is used if LOCAL_RANK is set @@ -905,7 +904,7 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None, verbose=Tr elif args['wandb']: logger = WandbLogger(args) - train_config = dict(**args['train'], env=env_name) + train_config = dict(**args['train'])#, env=env_name) #pufferl = PuffeRL(train_config, vecenv, policy, logger, verbose) pufferl = PuffeRL(train_config, logger, verbose) pufferl.logger.init(args) @@ -934,6 +933,8 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None, verbose=Tr # Final eval. You can reset the env here, but depending on # your env, this can skew data (i.e. you only collect the shortest # rollouts within a fixed number of epochs) + uptime = pufferl.uptime + agent_steps = pufferl.global_step for i in range(128): # Run eval for at least 32, but put a hard stop at 128. stats = pufferl.evaluate() if i >= 32 and stats: From 304990ccdb8bc3b50b9cb60784bb7ffab38b92ec Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 7 Nov 2025 13:17:45 +0000 Subject: [PATCH 100/188] Update configs for sweeping --- pufferlib/config/ocean/breakout.ini | 32 ++++++++++++++--------------- pufferlib/config/ocean/tetris.ini | 19 ++++++++--------- pufferlib/models.py | 15 ++++++++------ pufferlib/python_pufferl.py | 6 ++++-- 4 files changed, 38 insertions(+), 34 deletions(-) diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index c4e848625..2a5139cee 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -27,15 +27,15 @@ continuous = 0 [policy] hidden_size = 128 -num_layers = 3 +num_layers = 2 +expansion_factor = 2 d_state = 32 d_conv = 4 -expand = 2 [sweep.policy.hidden_size] distribution = uniform_pow2 min = 16 -max = 256 +max = 512 mean = 128 scale = auto @@ -46,19 +46,19 @@ max = 4 mean = 2 scale = auto -[sweep.policy.d_state] -distribution = uniform_pow2 -min = 8 -max = 128 -mean = 32 -scale = auto +#[sweep.policy.d_state] +#distribution = uniform_pow2 +#min = 32 +#max = 128 +#mean = 32 +#scale = auto -[sweep.policy.d_conv] -distribution = int_uniform -min = 1 -max = 4 -mean = 2 -scale = auto +#[sweep.policy.d_conv] +#distribution = int_uniform +#min = 1 +#max = 4 +#mean = 2 +#scale = auto [sweep.policy.expand] distribution = int_uniform @@ -111,7 +111,7 @@ vtrace_rho_clip = 1.1777184656786774 [sweep] downsample = 10 -max_cost = 240 +max_cost = 300 [sweep.env.num_envs] distribution = uniform_pow2 diff --git a/pufferlib/config/ocean/tetris.ini b/pufferlib/config/ocean/tetris.ini index 15e69b07b..6d53031d4 100644 --- a/pufferlib/config/ocean/tetris.ini +++ b/pufferlib/config/ocean/tetris.ini @@ -1,7 +1,7 @@ [base] package = ocean env_name = puffer_tetris -policy_name = Policy +policy_name = MinGRU rnn_name = Recurrent [vec] @@ -19,8 +19,8 @@ n_noise_obs = 0 [policy] hidden_size = 256 num_layers = 1 -d_state = 32 -d_conv = 4 +#d_state = 32 +#d_conv = 4 expand = 2 [sweep.policy.hidden_size] @@ -44,12 +44,12 @@ max = 128 mean = 32 scale = auto -[sweep.policy.d_conv] -distribution = int_uniform -min = 1 -max = 4 -mean = 2 -scale = auto +#[sweep.policy.d_conv] +#distribution = int_uniform +#min = 1 +#max = 4 +#mean = 2 +#scale = auto [sweep.policy.expand] distribution = int_uniform @@ -81,7 +81,6 @@ vf_coef = 4.74 vtrace_c_clip = 1.29 vtrace_rho_clip = 0.70 - [sweep] metric = score goal = maximize diff --git a/pufferlib/models.py b/pufferlib/models.py index d40f36394..c85699a32 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -40,7 +40,7 @@ def log_g(x): # they enforce the hidden states to be positive class MinGRULayer(Module): - def __init__(self, dim, expansion_factor = 1., proj_out = None): + def __init__(self, dim, expansion_factor=1., proj_out = None): super().__init__() dim_inner = int(dim * expansion_factor) @@ -226,19 +226,20 @@ def forward(self, x): return logits, values class MinGRU(nn.Module): - def __init__(self, env, hidden_size=128, num_layers=1, **kwargs): + def __init__(self, env, hidden_size=128, num_layers=1, expansion_factor=2, **kwargs): super().__init__() self.hidden_size = hidden_size self.input_size = hidden_size + self.expansion_factor = expansion_factor self.obs_shape = env.single_observation_space.shape self.encoder = DefaultEncoder(env, hidden_size) self.decoder = DefaultDecoder(env, hidden_size) self.num_layers = num_layers - self.mingru = nn.ModuleList([MinGRULayer(hidden_size) for _ in range(num_layers)]) + self.mingru = nn.ModuleList([MinGRULayer(hidden_size, expansion_factor) for _ in range(num_layers)]) def initial_state(self, batch_size, device): - state = torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device) + state = torch.zeros(self.num_layers, batch_size, self.hidden_size*self.expansion_factor, device=device) return (state,) def forward_eval(self, x, state): @@ -247,11 +248,13 @@ def forward_eval(self, x, state): h = self.encoder(x) h = h.unsqueeze(1) state = state.unsqueeze(2) + state_out = [] for i in range(self.num_layers): - h, state[i] = self.mingru[i](h, state[i]) + h, s = self.mingru[i](h, state[i]) + state_out.append(s) h = h.squeeze(1) - state = state.squeeze(2) + state = torch.stack(state_out, 0).squeeze(2) logits, values = self.decoder(h) return logits, values, (state,) diff --git a/pufferlib/python_pufferl.py b/pufferlib/python_pufferl.py index ddc1da423..506d60adf 100644 --- a/pufferlib/python_pufferl.py +++ b/pufferlib/python_pufferl.py @@ -52,6 +52,7 @@ class PuffeRL: def __init__(self, config, vecenv, policy, logger=None, verbose=True): # Backend perf optimization + torch.set_float32_matmul_precision('high') # Old torch.backends.cudnn.conv.fp32_precision = 'tf32' torch.backends.cudnn.deterministic = config['torch_deterministic'] torch.backends.cudnn.benchmark = True @@ -951,9 +952,10 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None, verbose=Tr stats = {} uptime = pufferl.uptime agent_steps = pufferl.global_step - while i < 128 or not stats: + for i in range(128): # Run eval for at least 32, but put a hard stop at 128. stats = pufferl.evaluate() - i += 1 + if i >= 32 and stats: + break logs = pufferl.mean_and_log() logs['uptime'] = uptime From 01071cbc5fb6f65ea26cda396b4ae5f3ae66af83 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Fri, 7 Nov 2025 13:33:06 +0000 Subject: [PATCH 101/188] Adam for now so we can run --- pufferlib/config/ocean/breakout.ini | 2 +- pufferlib/models.py | 2 +- pufferlib/python_pufferl.py | 22 ++++++++++++++-------- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pufferlib/config/ocean/breakout.ini b/pufferlib/config/ocean/breakout.ini index 2a5139cee..ceeee688f 100644 --- a/pufferlib/config/ocean/breakout.ini +++ b/pufferlib/config/ocean/breakout.ini @@ -60,7 +60,7 @@ scale = auto #mean = 2 #scale = auto -[sweep.policy.expand] +[sweep.policy.expansion_factor] distribution = int_uniform min = 1 max = 2 diff --git a/pufferlib/models.py b/pufferlib/models.py index dc251bec2..458f949ae 100644 --- a/pufferlib/models.py +++ b/pufferlib/models.py @@ -233,7 +233,7 @@ def __init__(self, env, hidden_size=128, num_layers=1, expansion_factor=2, **kwa self.obs_shape = env.single_observation_space.shape self.encoder = DefaultEncoder(env, hidden_size) self.decoder = DefaultDecoder(env, hidden_size) - self.expand = expand + self.expansion_factor = expansion_factor self.num_layers = num_layers self.mingru = nn.ModuleList([MinGRULayer(hidden_size, expansion_factor) for _ in range(num_layers)]) diff --git a/pufferlib/python_pufferl.py b/pufferlib/python_pufferl.py index 506d60adf..5d4718f1a 100644 --- a/pufferlib/python_pufferl.py +++ b/pufferlib/python_pufferl.py @@ -132,7 +132,7 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): import heavyball from heavyball import ForeachMuon warnings.filterwarnings(action='ignore', category=UserWarning, module=r'heavyball.*') - heavyball.utils.compile_mode = "default" + heavyball.utils.compile_mode = "reduce-overhead" # # optionally a little bit better/faster alternative to newtonschulz iteration # import heavyball.utils @@ -140,15 +140,21 @@ def __init__(self, config, vecenv, policy, logger=None, verbose=True): # heavyball_momentum=True introduced in heavyball 2.1.1 # recovers heavyball-1.7.2 behaviour - previously swept hyperparameters work well - self.optimizer = ForeachMuon( + self.optimizer = torch.optim.Adam( self.policy.parameters(), lr=config['learning_rate'], betas=(config['adam_beta1'], config['adam_beta2']), eps=config['adam_eps'], - heavyball_momentum=True, ) - + ''' + self.optimizer = ForeachMuon( + self.policy.parameters(), + lr=config['learning_rate'], + betas=(config['adam_beta1'], config['adam_beta2']), + eps=config['adam_eps'], + #heavyball_momentum=True, + ) self.muon = torch.optim.Muon( [e for e in self.policy.parameters() if e.dim() == 2], lr=config['learning_rate'], @@ -711,8 +717,8 @@ def __call__(self, name, epoch, nest=False): if epoch % self.frequency != 0: return - if torch.cuda.is_available(): - torch.cuda.synchronize() + #if torch.cuda.is_available(): + # torch.cuda.synchronize() tick = time.time() if len(self.stack) != 0 and not nest: @@ -728,8 +734,8 @@ def pop(self, end): profile['delta'] += delta def end(self): - if torch.cuda.is_available(): - torch.cuda.synchronize() + #if torch.cuda.is_available(): + # torch.cuda.synchronize() end = time.time() for i in range(len(self.stack)): From 84636a7cf56eeaed77b7fa27f7ff4e76d1065b96 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Sat, 8 Nov 2025 14:03:10 +0000 Subject: [PATCH 102/188] Initial kern --- pufferlib/extensions/cuda/kernels.cu | 152 +++++++++++++++ pufferlib/extensions/cuda/modules.cu | 99 ++++++++++ pufferlib/extensions/cuda/ops.cuh | 88 +++++++++ pufferlib/extensions/pufferlib.cpp | 268 ++++++++++++++++++++++----- setup.py | 11 +- 5 files changed, 574 insertions(+), 44 deletions(-) create mode 100644 pufferlib/extensions/cuda/kernels.cu create mode 100644 pufferlib/extensions/cuda/modules.cu create mode 100644 pufferlib/extensions/cuda/ops.cuh diff --git a/pufferlib/extensions/cuda/kernels.cu b/pufferlib/extensions/cuda/kernels.cu new file mode 100644 index 000000000..3572732bf --- /dev/null +++ b/pufferlib/extensions/cuda/kernels.cu @@ -0,0 +1,152 @@ +#include +#include +#include +#include +#include "ops.cuh" +#include +#include + +#include + +#define BLOCK_SIZE 256 +inline int grid_size(int N) { + return (N + BLOCK_SIZE - 1) / BLOCK_SIZE; +} + +// If you can get this to work, go ahead. I tried. +// NVCC won't parse templated types in kernel launches +/* +template