Emerge-Lab · mpragnay · Nov 9, 2025 · Nov 9, 2025 · Nov 11, 2025 · Nov 11, 2025
diff --git a/README.md b/README.md
@@ -117,7 +117,7 @@ The `-s` flag sets up a virtual screen at 1280x720 resolution with 24-bit color
 
 ### Distributional realism
 
-We provide a PufferDrive implementation of the [Waymo Open Sim Agents Challenge (WOSAC)](https://waymo.com/open/challenges/2025/sim-agents/) for fast, easy evaluation of how well your trained agent matches distributional properties of human behavior. See details [here](https://github.com/Emerge-Lab/PufferDrive/main/pufferlib/ocean/benchmark).
+We provide a PufferDrive implementation of the [Waymo Open Sim Agents Challenge (WOSAC)](https://waymo.com/open/challenges/2025/sim-agents/) for fast, easy evaluation of how well your trained agent matches distributional properties of human behavior. See details [here](https://github.com/Emerge-Lab/PufferDrive/tree/main/pufferlib/ocean/benchmark).
 
 WOSAC evaluation with random policy:
 ```bash

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
@@ -20,6 +20,8 @@ hidden_size = 256
 
 [env]
 num_agents = 1024
+; If True, we control non-vehicle entities as well (e.g., pedestrians, cyclists)
+control_non_vehicles = False
 ; Options: discrete, continuous
 action_type = discrete
 ; Options: classic, jerk
@@ -44,10 +46,16 @@ resample_frequency = 910
 num_maps = 1000
 ; Determines which step of the trajectory to initialize the agents at upon reset
 init_steps = 0
-; Options: "control_vehicles", "control_agents", "control_tracks_to_predict", "control_sdc_only"
+; Options: "control_vehicles", "control_agents", "control_wosac", "control_sdc_only"
 control_mode = "control_vehicles"
-; Options: "created_all_valid", "create_only_controlled"
-init_mode = "create_all_valid"
+; Options: "created_all_valid", "create_only_controlled", "dynamic_no_agents"
+init_mode = "dynamic_no_agents"
+; Only for dynamic_no_agents init_mode
+num_agents_per_world = 32
+vehicle_width = 2.0
+vehicle_length = 4.5
+vehicle_height = 1.8
+goal_curriculum = 30.0
 
 [train]
 total_timesteps = 2_000_000_000
@@ -87,7 +95,7 @@ show_grid = False
 show_lasers = False
 ; Display human xy logs in the background
 show_human_logs = True
-; Options: str to path (e.g., "resources/drive/binaries/map_001.bin"), None
+; Options: List[str to path], str to path (e.g., "resources/drive/binaries/map_001.bin"), None
 render_map = none
 
 [eval]
@@ -99,7 +107,7 @@ wosac_realism_eval = False
 wosac_num_rollouts = 32  # Number of policy rollouts per scene
 wosac_init_steps = 10 # When to start the simulation
 wosac_num_agents = 256  # Total number of WOSAC agents to evaluate
-wosac_control_mode = "control_tracks_to_predict"  # Control the tracks to predict
+wosac_control_mode = "control_wosac"  # Control the tracks to predict
 wosac_init_mode = "create_all_valid"  # Initialize from the tracks to predict
 wosac_goal_behavior = 2  # Stop when reaching the goal
 wosac_goal_radius = 2.0 # Can shrink goal radius for WOSAC evaluation
@@ -145,3 +153,9 @@ min = 0.0
 max = 1.0
 mean = 0.5
 scale = auto
+
+[controlled_exp.train.learning_rate]
+values = [0.001, 0.003, 0.01]
+
+[controlled_exp.train.ent_coef]
+values = [0.01, 0.005]
diff --git a/pufferlib/ocean/benchmark/README.md b/pufferlib/ocean/benchmark/README.md
@@ -60,7 +60,6 @@ Steps [for every scene]:
         Linear acceleration: 0.4658
         Angular speed: 0.5543
         Angular acceleration: 0.6589
-
         Kinematics realism score: 0.5607
         ```
     These scores go to 1.0 if we use the time-dependent estimator, execpt for the smoothing factor that is used to avoid bins with 0 probability.

diff --git a/pufferlib/ocean/benchmark/estimators.py b/pufferlib/ocean/benchmark/estimators.py
@@ -13,7 +13,7 @@ def histogram_estimate(
     min_val: float,
     max_val: float,
     num_bins: int,
-    additive_smoothing: float = 0.1,
+    additive_smoothing: float,
 ) -> np.ndarray:
     """Computes log-likelihoods of samples based on histograms.
 
@@ -68,7 +68,7 @@ def log_likelihood_estimate_timeseries(
     min_val: float,
     max_val: float,
     num_bins: int,
-    additive_smoothing: float = 0.1,
+    additive_smoothing: float,
     treat_timesteps_independently: bool = True,
     sanity_check: bool = False,
     plot_agent_idx: int = 0,
@@ -120,6 +120,86 @@ def log_likelihood_estimate_timeseries(
     return log_probs
 
 
+def bernoulli_estimate(
+    log_samples: np.ndarray,
+    sim_samples: np.ndarray,
+    additive_smoothing: float,
+) -> np.ndarray:
+    """Computes log probabilities of samples based on Bernoulli distributions.
+
+    Args:
+        log_samples: Boolean array of shape (n_agents, sample_size)
+        sim_samples: Boolean array of shape (n_agents, sample_size)
+        additive_smoothing: Pseudocount for Laplace smoothing
+
+    Returns:
+        Shape (n_agents, sample_size) - log-likelihood of each log sample
+    """
+    if log_samples.dtype != bool:
+        raise ValueError("log_samples must be boolean array for Bernoulli estimate")
+    if sim_samples.dtype != bool:
+        raise ValueError("sim_samples must be boolean array for Bernoulli estimate")
+
+    return histogram_estimate(
+        log_samples.astype(float),
+        sim_samples.astype(float),
+        min_val=-0.5,
+        max_val=1.5,
+        num_bins=2,
+        additive_smoothing=additive_smoothing,
+    )
+
+
+def log_likelihood_estimate_scenario_level(
+    log_values: np.ndarray,
+    sim_values: np.ndarray,
+    min_val: float,
+    max_val: float,
+    num_bins: int,
+    additive_smoothing: float | None = None,
+    use_bernoulli: bool = False,
+) -> np.ndarray:
+    """Computes log-likelihood estimates for scenario-level features (no time dimension).
+
+    Args:
+        log_values: Shape (n_agents,)
+        sim_values: Shape (n_agents, n_rollouts)
+        min_val: Minimum value for histogram bins (ignored if use_bernoulli=True)
+        max_val: Maximum value for histogram bins (ignored if use_bernoulli=True)
+        num_bins: Number of histogram bins (ignored if use_bernoulli=True)
+        additive_smoothing: Pseudocount for Laplace smoothing
+        use_bernoulli: If True, use Bernoulli estimator for boolean features
+
+    Returns:
+        Shape (n_agents,) - log-likelihood of each log feature
+    """
+    if log_values.ndim != 1:
+        raise ValueError(f"log_values must be 1D, got shape {log_values.shape}")
+    if sim_values.ndim != 2:
+        raise ValueError(f"sim_values must be 2D, got shape {sim_values.shape}")
+
+    log_values_2d = log_values[:, np.newaxis]
+    sim_values_2d = sim_values
+
+    if use_bernoulli:
+        log_likelihood_2d = bernoulli_estimate(
+            log_values_2d.astype(bool),
+            sim_values_2d.astype(bool),
+            additive_smoothing=0.001,
+        )
+    else:
+        log_likelihood_2d = histogram_estimate(
+            log_values_2d,
+            sim_values_2d,
+            min_val=min_val,
+            max_val=max_val,
+            num_bins=num_bins,
+            additive_smoothing=additive_smoothing,
+        )
+
+    return log_likelihood_2d[:, 0]
+
+
 def _plot_histogram_sanity_check(
     log_samples: np.ndarray,
     sim_samples: np.ndarray,