Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ The `-s` flag sets up a virtual screen at 1280x720 resolution with 24-bit color

### Distributional realism

We provide a PufferDrive implementation of the [Waymo Open Sim Agents Challenge (WOSAC)](https://waymo.com/open/challenges/2025/sim-agents/) for fast, easy evaluation of how well your trained agent matches distributional properties of human behavior. See details [here](https://github.com/Emerge-Lab/PufferDrive/main/pufferlib/ocean/benchmark).
We provide a PufferDrive implementation of the [Waymo Open Sim Agents Challenge (WOSAC)](https://waymo.com/open/challenges/2025/sim-agents/) for fast, easy evaluation of how well your trained agent matches distributional properties of human behavior. See details [here](https://github.com/Emerge-Lab/PufferDrive/tree/main/pufferlib/ocean/benchmark).

WOSAC evaluation with random policy:
```bash
Expand Down
24 changes: 19 additions & 5 deletions pufferlib/config/ocean/drive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ hidden_size = 256

[env]
num_agents = 1024
; If True, we control non-vehicle entities as well (e.g., pedestrians, cyclists)
control_non_vehicles = False
; Options: discrete, continuous
action_type = discrete
; Options: classic, jerk
Expand All @@ -44,10 +46,16 @@ resample_frequency = 910
num_maps = 1000
; Determines which step of the trajectory to initialize the agents at upon reset
init_steps = 0
; Options: "control_vehicles", "control_agents", "control_tracks_to_predict", "control_sdc_only"
; Options: "control_vehicles", "control_agents", "control_wosac", "control_sdc_only"
control_mode = "control_vehicles"
; Options: "created_all_valid", "create_only_controlled"
init_mode = "create_all_valid"
; Options: "created_all_valid", "create_only_controlled", "dynamic_no_agents"
init_mode = "dynamic_no_agents"
; Only for dynamic_no_agents init_mode
num_agents_per_world = 32
vehicle_width = 2.0
vehicle_length = 4.5
vehicle_height = 1.8
goal_curriculum = 30.0

[train]
total_timesteps = 2_000_000_000
Expand Down Expand Up @@ -87,7 +95,7 @@ show_grid = False
show_lasers = False
; Display human xy logs in the background
show_human_logs = True
; Options: str to path (e.g., "resources/drive/binaries/map_001.bin"), None
; Options: List[str to path], str to path (e.g., "resources/drive/binaries/map_001.bin"), None
render_map = none

[eval]
Expand All @@ -99,7 +107,7 @@ wosac_realism_eval = False
wosac_num_rollouts = 32 # Number of policy rollouts per scene
wosac_init_steps = 10 # When to start the simulation
wosac_num_agents = 256 # Total number of WOSAC agents to evaluate
wosac_control_mode = "control_tracks_to_predict" # Control the tracks to predict
wosac_control_mode = "control_wosac" # Control the tracks to predict
wosac_init_mode = "create_all_valid" # Initialize from the tracks to predict
wosac_goal_behavior = 2 # Stop when reaching the goal
wosac_goal_radius = 2.0 # Can shrink goal radius for WOSAC evaluation
Expand Down Expand Up @@ -145,3 +153,9 @@ min = 0.0
max = 1.0
mean = 0.5
scale = auto

[controlled_exp.train.learning_rate]
values = [0.001, 0.003, 0.01]

[controlled_exp.train.ent_coef]
values = [0.01, 0.005]
1 change: 0 additions & 1 deletion pufferlib/ocean/benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ Steps [for every scene]:
Linear acceleration: 0.4658
Angular speed: 0.5543
Angular acceleration: 0.6589

Kinematics realism score: 0.5607
```
These scores go to 1.0 if we use the time-dependent estimator, execpt for the smoothing factor that is used to avoid bins with 0 probability.
Expand Down
84 changes: 82 additions & 2 deletions pufferlib/ocean/benchmark/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def histogram_estimate(
min_val: float,
max_val: float,
num_bins: int,
additive_smoothing: float = 0.1,
additive_smoothing: float,
) -> np.ndarray:
"""Computes log-likelihoods of samples based on histograms.

Expand Down Expand Up @@ -68,7 +68,7 @@ def log_likelihood_estimate_timeseries(
min_val: float,
max_val: float,
num_bins: int,
additive_smoothing: float = 0.1,
additive_smoothing: float,
treat_timesteps_independently: bool = True,
sanity_check: bool = False,
plot_agent_idx: int = 0,
Expand Down Expand Up @@ -120,6 +120,86 @@ def log_likelihood_estimate_timeseries(
return log_probs


def bernoulli_estimate(
log_samples: np.ndarray,
sim_samples: np.ndarray,
additive_smoothing: float,
) -> np.ndarray:
"""Computes log probabilities of samples based on Bernoulli distributions.

Args:
log_samples: Boolean array of shape (n_agents, sample_size)
sim_samples: Boolean array of shape (n_agents, sample_size)
additive_smoothing: Pseudocount for Laplace smoothing

Returns:
Shape (n_agents, sample_size) - log-likelihood of each log sample
"""
if log_samples.dtype != bool:
raise ValueError("log_samples must be boolean array for Bernoulli estimate")
if sim_samples.dtype != bool:
raise ValueError("sim_samples must be boolean array for Bernoulli estimate")

return histogram_estimate(
log_samples.astype(float),
sim_samples.astype(float),
min_val=-0.5,
max_val=1.5,
num_bins=2,
additive_smoothing=additive_smoothing,
)


def log_likelihood_estimate_scenario_level(
log_values: np.ndarray,
sim_values: np.ndarray,
min_val: float,
max_val: float,
num_bins: int,
additive_smoothing: float | None = None,
use_bernoulli: bool = False,
) -> np.ndarray:
"""Computes log-likelihood estimates for scenario-level features (no time dimension).

Args:
log_values: Shape (n_agents,)
sim_values: Shape (n_agents, n_rollouts)
min_val: Minimum value for histogram bins (ignored if use_bernoulli=True)
max_val: Maximum value for histogram bins (ignored if use_bernoulli=True)
num_bins: Number of histogram bins (ignored if use_bernoulli=True)
additive_smoothing: Pseudocount for Laplace smoothing
use_bernoulli: If True, use Bernoulli estimator for boolean features

Returns:
Shape (n_agents,) - log-likelihood of each log feature
"""
if log_values.ndim != 1:
raise ValueError(f"log_values must be 1D, got shape {log_values.shape}")
if sim_values.ndim != 2:
raise ValueError(f"sim_values must be 2D, got shape {sim_values.shape}")

log_values_2d = log_values[:, np.newaxis]
sim_values_2d = sim_values

if use_bernoulli:
log_likelihood_2d = bernoulli_estimate(
log_values_2d.astype(bool),
sim_values_2d.astype(bool),
additive_smoothing=0.001,
)
else:
log_likelihood_2d = histogram_estimate(
log_values_2d,
sim_values_2d,
min_val=min_val,
max_val=max_val,
num_bins=num_bins,
additive_smoothing=additive_smoothing,
)

return log_likelihood_2d[:, 0]


def _plot_histogram_sanity_check(
log_samples: np.ndarray,
sim_samples: np.ndarray,
Expand Down
Loading
Loading