From 6665fabf489841ce8f91e4d0aebec12c6b80f3bf Mon Sep 17 00:00:00 2001
From: amoon <amoon@mit.edu>
Date: Mon, 17 Jun 2024 15:53:14 -0400
Subject: [PATCH 1/2] removed postorier and update plot

---
 simulation/fit_pm.py        | 251 ++++++++++++++++++++----------------
 simulation/interact_tool.py | 234 ++++++++++++++-------------------
 simulation/test_hypo.py     | 139 ++++++++++----------
 3 files changed, 307 insertions(+), 317 deletions(-)

diff --git a/simulation/fit_pm.py b/simulation/fit_pm.py
index edb65db..499feb7 100644
--- a/simulation/fit_pm.py
+++ b/simulation/fit_pm.py
@@ -15,115 +15,130 @@
 
 def sample_profit_obs(t, em):
     """Return the profit observation based on the experiment index and product-market pair."""
-    return mu2profit(em['mu_b_r'].item(), em['mu_c_r'].item(), em['product'][t].item(), em['market'][t].item())
+    return mu2profit(em['mu_p_r'].item(), em['mu_m_r'].item(), em['product'][t].item(), em['market'][t].item())
 
 
-def decide_action(profit_obs, low_profit_b, high_profit_b):
-    """Decide the next action based on predicted and observed profit."""
+def decide_action(profit_obs, low_profit_b, high_profit_b, CR=.5):
+    """Decide the next action based on predicted and observed profit and cost considerations."""
 
-    if low_profit_b <= profit_obs <= high_profit_b:
-        return "pivot_product"
-    elif profit_obs < low_profit_b:
-        return "pivot_market"
-    elif profit_obs > high_profit_b:
-        return "scale"
+    if CR > 1:  #🐣 pivot_product cost > pivot_market cost
+        if profit_obs > high_profit_b:
+            return "scale"
+        elif low_profit_b <= profit_obs <= high_profit_b:
+            return "pivot_product"
+        else: 
+            return "pivot_market"
 
+    else:  #🦖 pivot_product cost <= pivot_market cost
+        if profit_obs > high_profit_b:
+            return "scale"
+        elif low_profit_b <= profit_obs <= high_profit_b:
+            return "pivot_market"
+        else: 
+            return "pivot_product"
 
-def mu2profit(mu_b, mu_c, product, market):
+
+def mu2profit(mu_p, mu_m, product, market):
     """Return the profit based on the given parameters."""
-    return pow(-1, e2p[product]+1) * mu_b/2 + pow(-1, e2m[market]+1) * mu_c/2
+    return pow(-1, e2p[product]+1) * mu_p/2 + pow(-1, e2m[market]+1) * mu_m/2
 
 
-def predict_observe_update_as_lbs(e, em):
+def predict_observe_update_as_lbs(t, em):
     """Predict, observe signal in chosen product-market, update mus, predict and observe profit, and decide action.
-       LATENT BIT STATE (LBS): mu_b_b, mu_c_b
+       externally, TIME = 1 but e = 0
+       LATENT BIT STATE (LBS): mu_p_b, mu_m_b
        BIT STATE (BS): profit_b (predicted profit), low_profit_b, high_profit_b
        ATOM STATE (AS): product, market
     """
     # E step
-    # PREDICT BAL: B[e]|A[e], L[e]
+    # PREDICT BAL: B[t]|A[t], L[t]
     for p in em.coords['PD'].values:
         for m in em.coords['MK'].values:
-            em['profit_b'].loc[dict(PD=p, MK=m, ACT_PRED=e)] = mu2profit(em['mu_b_b'][e], em['mu_c_b'][e], p, m)
+            em['profit_b'].loc[dict(PD=p, MK=m, ACT_PRED=t)] = mu2profit(em['mu_p_b'][t], em['mu_m_b'][t], p, m)
     
-    em['low_profit_b'][e] = em['profit_b'][e2p[em['product'][e].item()], e2m[em['market'][e].item()], e].item() - em['k_sigma'].item() * em['sigma_profit'][e].item()
-    em['high_profit_b'][e] = em['profit_b'][e2p[em['product'][e].item()], e2m[em['market'][e].item()], e].item() + em['k_sigma'].item() * em['sigma_profit'][e].item()
+    em['low_profit_b'][t] = em['profit_b'][e2p[em['product'][t].item()], e2m[em['market'][t].item()], t].item() - em['k_sigma'].item() * em['sigma_profit'][t].item()
+    em['high_profit_b'][t] = em['profit_b'][e2p[em['product'][t].item()], e2m[em['market'][t].item()], t].item() + em['k_sigma'].item() * em['sigma_profit'][t].item()
     
-    # OBSERVE CA: C[A[e]]|A[e]
-    em['profit_obs'][e] = sample_profit_obs(e, em) 
-    em['action'][e] = decide_action(em['profit_obs'][e], em['low_profit_b'][e], em['high_profit_b'][e])
+    # OBSERVE CA: C[A[t]]|A[t]
+    em['profit_obs'][t] = sample_profit_obs(t, em) 
+    em['action'][t] = decide_action(em['profit_obs'][t], em['low_profit_b'][t], em['high_profit_b'][t], em['CR'][0])
     
     # M step
-    if em['action'][e] == "scale":
+    if em['action'][t] == "scale":
         return em
     
-    elif e < em.dims['ACT_PRED']:     
-        pivot_product_model = cmdstanpy.CmdStanModel(stan_file='stan/pivot_product.stan')
-        pivot_market_model = cmdstanpy.CmdStanModel(stan_file='stan/pivot_market.stan')
+    pivot_product_model = cmdstanpy.CmdStanModel(stan_file='stan/pivot_product.stan')
+    pivot_market_model = cmdstanpy.CmdStanModel(stan_file='stan/pivot_market.stan')
+    
+    if em['action'][t] == "pivot_product":
+        # UPDATE A ABC: A[t+1]| A[t], B[t], C[A[t]]            
+        # if t < em.dims['ACT_PRED']-1:
+        em['product'][t+1] = 'man' if em['product'][t].item() == 'ai' else 'ai'
+        em['market'][t+1] = em['market'][t].item()
         
-        if em['action'][e] == "pivot_product":
-            # UPDATE A ABC: A[e+1]| A[e], B[e], C[A[e]]            
-            if e < em.dims['ACT_PRED']-1:
-                em['product'][e+1] = 'man' if em['product'][e].item() == 'ai' else 'ai'
-                em['market'][e+1] = em['market'][e].item()
-            
-            # UPDATE L LBC: L[e+1]| L[e], B[e], C[A[e]] (store posterior, set next stage prior)
-            data = {
-                'product': e2p[em.product[e].item()] + 1,
-                'market': e2m[em.market[e].item()] + 1,
-                'mu_b_b_mean': em.mu_b_b[e].item(),
-                'mu_c_b': em.mu_c_b[e].item(),
-                'profit_obs': em['profit_obs'][e],
-            }
-            fit = pivot_product_model.sample(data=data)
-            
-            em['mu_b_b_post'][e] = fit.stan_variable('mu_b_b')
-            em['mu_c_b_post'][e] = em['mu_c_b_post'][e-1] if e > 0 else em['mu_c_b'][e]
-            
-            em['mu_b_b'][e+1] = em['mu_b_b_post'][e].mean()
-            em['mu_c_b'][e+1] = em['mu_c_b'][e]         
-
-        elif em['action'][e] == "pivot_market":
-            # UPDATE A ABC: A[e+1]| A[e], B[e], C[A[e]]
-            if e < em.dims['ACT_PRED']-1:
-                em['market'][e+1] = 'b2b' if em['market'][e].item() == 'b2c' else 'b2c'
-                em['product'][e+1] = em['product'][e].item()
-            
-            # UPDATE L LBC: L[e+1]| L[e], B[e], C[A[e]] (store posterior, set next stage prior)
-            data = {
-                'product': e2p[em.product[e].item()] + 1,
-                'market': e2m[em.market[e].item()] + 1,
-                'mu_c_b_mean': em.mu_c_b[e].item(),
-                'mu_b_b': em.mu_b_b[e].item(),
-                'profit_obs': em['profit_obs'][e],
-            }
-            fit = pivot_market_model.sample(data=data)
-
-            em['mu_c_b_post'][e] = fit.stan_variable('mu_c_b')
-            em['mu_b_b_post'][e] = em['mu_b_b_post'][e-1] if e > 0 else em['mu_b_b'][e]
-            
-            em['mu_b_b'][e+1] = em['mu_b_b'][e]
-            em['mu_c_b'][e+1] = em['mu_c_b_post'][e].mean()          
+        # UPDATE L LBC: L[t+1]| L[t], B[t], C[A[t]] (store posterior, set next stage prior)
+        data = {
+            'product': e2p[em.product[t].item()] + 1,
+            'market': e2m[em.market[t].item()] + 1,
+            'mu_p_b_mean': em.mu_p_b[t].item(),
+            'mu_m_b': em.mu_m_b[t].item(),
+            'profit_obs': em['profit_obs'][t],
+        }
+        fit = pivot_product_model.sample(data=data)
         
-        # use A[e+1] reparameterization structure for L[e]
-        em['profit_post'][e] = mu2profit(em['mu_b_b_post'][e], em['mu_c_b_post'][e], em['product'][e].item(), em['market'][e].item())
-        if e == 0:
-            em['profit_prior'][e] = np.random.normal(em['profit_b'].loc[dict(PD=em.product[e].item(), MK=em.market[e].item(), ACT_PRED=e)] , em['sigma_profit'][e], 4000)
-        if e < em.dims['ACT_PRED']-1:
-            em['profit_prior'][e+1] = mu2profit(em['mu_b_b_post'][e], em['mu_c_b_post'][e], em['product'][e+1].item(), em['market'][e+1].item())
-            em['sigma_profit'][e+1] = em['profit_prior'][e+1].std()
-    return em
+        em['mu_p_b_prior'][t+1] = fit.stan_variable('mu_p_b') #posterior
+        em['mu_m_b_prior'][t+1] = em['mu_m_b_prior'][t]
+
+        em['mu_p_b'][t+1] = em['mu_p_b_prior'][t+1].mean()
+        em['mu_m_b'][t+1] = em['mu_m_b'][t]    
+
+        em['sigma_mu_p'][t+1] = em['mu_p_b_prior'][t+1].std()     
+        em['sigma_mu_m'][t+1] = em['sigma_mu_m'][t]
 
+    elif em['action'][t] == "pivot_market":
+        # UPDATE A ABC: A[t+1]| A[t], B[t], C[A[t]]
+        # if t < em.dims['ACT_PRED']-1:
+        em['market'][t+1] = 'b2b' if em['market'][t].item() == 'b2c' else 'b2c'
+        em['product'][t+1] = em['product'][t].item()
+        
+        # UPDATE L LBC: L[t+1]| L[t], B[t], C[A[t]] (store posterior, set next stage prior)
+        data = {
+            'product': e2p[em.product[t].item()] + 1,
+            'market': e2m[em.market[t].item()] + 1,
+            'mu_m_b_mean': em.mu_m_b[t].item(),
+            'mu_p_b': em.mu_p_b[t].item(),
+            'profit_obs': em['profit_obs'][t],
+        }
+        fit = pivot_market_model.sample(data=data)
+
+        em['mu_m_b_prior'][t+1] = fit.stan_variable('mu_m_b') #posterior
+        em['mu_p_b_prior'][t+1] = em['mu_p_b_prior'][t]
+
+        em['mu_m_b'][t+1] = em['mu_m_b_prior'][t+1].mean()
+        em['mu_p_b'][t+1] = em['mu_p_b'][t]
+
+        em['sigma_mu_p'][t+1] = em['sigma_mu_p'][t]
+        em['sigma_mu_m'][t+1] = em['mu_m_b_prior'][t+1].std()
+
+    # use A[t+1] reparameterization structure for L[t]
+    em['profit_post'][t] = mu2profit(em['mu_m_b'][t+1], em['mu_p_b'][t+1], em['product'][t].item(), em['market'][t].item())
+    
+    em['profit_prior'][t+1] = mu2profit(em['mu_p_b_prior'][t+1], em['mu_m_b_prior'][t+1], em['product'][t+1].item(), em['market'][t+1].item())
+    em['sigma_profit'][t+1] = em['profit_prior'][t+1].std() # CHECKED em['sigma_profit'][t+1]**2 (.67) ~  (em['sigma_mu_p'][t+1]**2 + em['sigma_mu_m'][t+1]**2)/4 (.69)
+    return em
 
 
-def experiment(mu_b_d, mu_c_d, mu_b_r, mu_c_r, sigma_profit, k, T, product='man', market='b2c'):
+def experiment(mu_p2m, mu_sum, sigma_profit, k_sigma, T, product='man', market='b2c'):
     """
     Record the expected reward from the experiment given initial parameters.
-    mu_b_d = mu_b_b - mu_b_r  # belief and goal differ but we treat both as _b
-    mu_c_d = mu_c_b - mu_c_r  
+    mu_p_d = (mu_sum * mu_p2m)/ (mu_p2m + 1)
+    mu_m_d = mu_sum/ (mu_p2m + 1)
+
+    mu_p_d = mu_p_r - mu_p_b (=0) = mu_p_r # belief and goal differ but we treat both as _b
+    mu_m_d = mu_m_r - mu_m_b (=0) = mu_m_r
     """
-    coords = {'HP': np.arange(1), 'P': np.arange(T+1), 'ACT_PRED': np.arange(T), 'ACT_PVT': np.arange(T), 'PD': ["man", "ai"], 'MK': ["b2c", "b2b"], 'M': np.arange(4000)}
-    em_name = f"bB{mu_b_d}_cC{mu_c_d}_B{mu_b_r}_C{mu_c_r}_s{sigma_profit}_k{k}_T{T}_{product}_{market}"
+    coords = {'H': np.arange(1), 'B': np.arange(T+1), 'ACT_PRED': np.arange(T), 'ACT_PVT': np.arange(T), 'PD': ["man", "ai"], 'MK': ["b2c", "b2b"], 'M': np.arange(4000)}
+    em_name = f"p2m-ratio{mu_p2m}_sum{mu_sum}_sigma{sigma_profit}_k-sigma{k_sigma}_exp{T}_{product}_{market}"
     file_path = f"data/experiment/{em_name}.nc"
 
     if os.path.exists(file_path):
@@ -132,65 +147,75 @@ def experiment(mu_b_d, mu_c_d, mu_b_r, mu_c_r, sigma_profit, k, T, product='man'
         return em
 
     data_vars = {
-        'mu_b_d': ('P', np.zeros(T+1)), 
-        'mu_c_d': ('P', np.zeros(T+1)),
-        'mu_b_b': ('P', np.zeros(T+1)),
-        'mu_c_b': ('P', np.zeros(T+1)),
-        'sigma_profit': ('P', np.zeros(T+1)),
-        
+        'mu_p_d': ('B', np.zeros(T+1)), 
+        'mu_m_d': ('B', np.zeros(T+1)),
+        'mu_p_b': ('B', np.zeros(T+1)),
+        'mu_m_b': ('B', np.zeros(T+1)),
+        'sigma_profit': ('B', np.zeros(T+1)),
+        'sigma_mu_p':('B', np.zeros(T+1)), # belief before1, ..., beforeT (=T), predicted
+        'sigma_mu_m':('B', np.zeros(T+1)),
+
         # PREDICT
         'market': ('ACT_PRED', np.full(T, '', dtype='object')), #entrant randomly choose (same Eprofit currently (belief comes from future))
         'product': ('ACT_PRED', np.full(T, '', dtype='object')),
+
         'profit_b': (('PD', 'MK', 'ACT_PRED'), np.zeros((2, 2, T))),
-        
         'low_profit_b': ('ACT_PRED', np.zeros(T)),
         'high_profit_b': ('ACT_PRED', np.zeros(T)),
+
         # OBSERVE
         'profit_obs': ('ACT_PRED', np.zeros(T)),
+
         # UPDATED BIT STATE
         'profit_prior': (('ACT_PRED', 'M'), np.zeros((T,4000))),
         'profit_post': (('ACT_PRED', 'M'), np.zeros((T,4000))),
-        'mu_b_b_post': (('ACT_PRED', 'M'), np.zeros((T,4000))),
-        'mu_c_b_post': (('ACT_PRED', 'M'), np.zeros((T,4000))),
+
+        'mu_p_b_prior': (('B', 'M'), np.zeros((T+1,4000))), # updating belief is expensive - do only after observation
+        'mu_m_b_prior': (('B', 'M'), np.zeros((T+1,4000))),
+        # 'mu_p_b_post': (('ACT_PRED', 'M'), np.zeros((T,4000))), # one lag
+        # 'mu_m_b_post': (('ACT_PRED', 'M'), np.zeros((T,4000))),
+
         # UPDATE ATOM STATE
         'action': ('ACT_PVT', np.full(T, '', dtype='object')),
         
-        'sigma_obs': ('HP', np.zeros(1)),
-        'mu_b_r': ('HP', np.zeros(1)),
-        'mu_c_r': ('HP', np.zeros(1)),
-        'k_sigma': ('HP', np.zeros(1)),
+        'mu_p_r': ('H', np.zeros(1)),
+        'mu_m_r': ('H', np.zeros(1)), 
+        'k_sigma': ('H', np.zeros(1)),
+        'CR': ('H', np.zeros(1)),
         'em_name': ((), em_name),
     }
     em = xr.Dataset(data_vars=data_vars, coords=coords)
-    em['profit_r'] = (('PD', 'MK', 'HP'), np.zeros((len(em.coords['PD']), len(em.coords['MK']), 1)))
+    em['profit_r'] = (('PD', 'MK'), np.zeros((len(em.coords['PD']), len(em.coords['MK']))))
 
-    for t in range(em.dims['P']):
+    for t in range(em.dims['ACT_PRED']-1):
         if t == 0:
-            em['mu_b_d'][0] = mu_b_d
-            em['mu_c_d'][0] = mu_c_d
-            em['mu_b_b'][0] =  mu_b_r + mu_b_d
-            em['mu_c_b'][0] = mu_c_r + mu_c_d 
-            em['mu_b_r'][0] = mu_b_r
-            em['mu_c_r'][0] = mu_c_r
-            em['sigma_obs'][0] = 1
+            em['mu_p_r'][0] = (mu_sum * mu_p2m)/ (mu_p2m + 1)
+            em['mu_m_r'][0] = mu_sum/ (mu_p2m + 1)
+            em['mu_p_b'][0] = 0 
+            em['mu_m_b'][0] = 0 
+            em['mu_p_d'][0] = em['mu_p_r'][0]
+            em['mu_m_d'][0] = em['mu_m_r'][0]
+            em['CR'][0] = .5 #cost_pivot_product / cost_pivot_market; 🐣CR=2, 🦖CR=.5
+
+            em['sigma_mu_p'][0] = sigma_profit * np.sqrt(2)
+            em['sigma_mu_m'][0] = sigma_profit * np.sqrt(2)
             em['sigma_profit'][0] = sigma_profit
+            
+            em['mu_p_b_prior'][0] = np.random.normal(0, em['sigma_mu_p'][0], 4000)
+            em['mu_m_b_prior'][0] = np.random.normal(0, em['sigma_mu_m'][0], 4000)
+            em['profit_prior'][0] = np.random.normal(0, em['sigma_profit'][0], 4000)
 
-            em['mu_b_b_post'][0] = em['mu_b_b'][0]
-            em['mu_c_b_post'][0] = em['mu_c_b'][0]
             for p in em.coords['PD'].values:           
                 for m in em.coords['MK'].values:
-                    em['profit_r'].loc[dict(PD=p, MK=m)] = mu2profit(em['mu_b_r'].item(), em['mu_c_r'].item(), p, m)
-
-        elif t == 1:
+                    em['profit_r'].loc[dict(PD=p, MK=m)] = mu2profit(em['mu_p_r'].item(), em['mu_m_r'].item(), p, m)
+            
             em['product'][0] = product
             em['market'][0] = market
+            em['k_sigma'][0] = k_sigma
 
-            em['k_sigma'][0] = k
-            em = predict_observe_update_as_lbs(t-1, em) #time 1 is 0th experiment
-        else:
-            em = predict_observe_update_as_lbs(t-1, em)
+        em = predict_observe_update_as_lbs(t, em)
 
-        if em['action'][t-1].item() == "scale":
+        if em['action'][t].item() == "scale":
             return em
 
     em.to_netcdf(f"data/experiment/{em.em_name.values}.nc")
@@ -198,4 +223,4 @@ def experiment(mu_b_d, mu_c_d, mu_b_r, mu_c_r, sigma_profit, k, T, product='man'
 
 if __name__ == "__main__":     
     # Start the experiment
-    em = experiment(mu_b_d= -3, mu_c_d= -2, mu_b_r=3, mu_c_r=1, T=2)
\ No newline at end of file
+    em = experiment(mu_p2m = 3, mu_sum = 4, sigma_profit = 1, k_sigma=2,  T=2)
\ No newline at end of file
diff --git a/simulation/interact_tool.py b/simulation/interact_tool.py
index 80dedcf..94d359f 100644
--- a/simulation/interact_tool.py
+++ b/simulation/interact_tool.py
@@ -11,6 +11,7 @@
 from ipywidgets import IntSlider, FloatSlider, VBox, HBox, HTML, Output
 from IPython.display import display
 import matplotlib.colors as mcolors
+from mpl_toolkits.mplot3d import Axes3D
 
 global markets, products, e2p, e2m
 markets = ["b2c", "b2b"]
@@ -28,48 +29,39 @@ def plot_layer0_belief(em):
     - em: xarray dataset containing the required variables
     """
     num_exp = np.where(em['action'].values == "scale")[0][0] + 1 if "scale" in em['action'].values else em['action'].shape[0]
-    experiments = np.arange(1, num_exp + 1)
-
-    # Extracting belief values from the em dataset
-    mu_b_b = em['mu_b_b'].values[:num_exp]
-    mu_c_b = em['mu_c_b'].values[:num_exp]
-    
-    # Extracting ground truth values from the em dataset
-    mu_b_r = em['mu_b_r'].values[0]
-    mu_c_r = em['mu_c_r'].values[0]
+    experiments = np.arange(1, num_exp+1)
     
-    # Extracting posterior samples from the em dataset
-    mu_b_b_post = em['mu_b_b_post'].values[:num_exp]
-    mu_c_b_post = em['mu_c_b_post'].values[:num_exp]
+    mu_p_b = em['mu_p_b'].values[:num_exp]
+    sigma_mu_p = em['sigma_mu_p'].values[:num_exp]
+    mu_m_b = em['mu_m_b'].values[:num_exp]
+    sigma_mu_m = em['sigma_mu_m'].values[:num_exp]
 
-    # Create subplots to put the plots side by side
     fig, axs = plt.subplots(1, 2, figsize=(32, 8), sharey=True)
+    
     fig.suptitle(em['em_name'].item())
 
-    # Plotting the parameter updates with ground truth on separate subplots
-    az.plot_hdi(experiments, mu_b_b_post.T, hdi_prob=0.94, ax=axs[0], color='green', fill_kwargs={'alpha': 0.2})
-    axs[0].plot(experiments, mu_b_b, marker='>',linestyle='--',  label='Updated $\mu_{b}$', color='green')
-    axs[0].axhline(y=mu_b_r, color='green', linestyle='-', linewidth=5, label='Ground Truth $\mu_{b}$')
-    axs[0].set_title('Belief in $\mu_{b}$ by Time')
+    axs[0].fill_between(experiments, mu_p_b - 2 * sigma_mu_p,  mu_p_b + 2 * sigma_mu_p, color='green', alpha=0.2, label='$\pm2\sigma$')
+    axs[0].plot(experiments, mu_p_b, marker='>',linestyle='--',  label='Updated $\mu_{product}$', color='green')
+    axs[0].axhline(y=em['mu_p_r'].values[0], color='green', linestyle='-', linewidth=5, label='Ground Truth $\mu_{product}$')
+    axs[0].set_title('Belief in $\mu_{product}$ by Time')
     axs[0].set_xlabel('Experiment')
     axs[0].set_xticklabels([str(int(exp)) for exp in experiments])
     axs[0].set_xticks(experiments)
-    axs[0].set_ylabel('Belief on $\mu_{b}$')
+    axs[0].set_ylabel('Belief on $\mu_{product}$')
     axs[0].legend(loc='lower left')
     axs[0].grid(True)
 
-    az.plot_hdi(experiments, mu_c_b_post.T, hdi_prob=0.94, ax=axs[1], color='purple', fill_kwargs={'alpha': 0.2})
-    axs[1].plot(experiments, mu_c_b, marker='>',linestyle='--', label='Updated $\mu_{c}$', color='purple')
-    axs[1].axhline(y=mu_c_r, color='purple', linestyle='-', linewidth=5, label='Ground Truth $\mu_{c}$')
-    axs[1].set_title('Belief in $\mu_{c}$ by Time')
+    axs[1].fill_between(experiments, mu_m_b - 2 * sigma_mu_m,  mu_m_b + 2 * sigma_mu_p, color='purple', alpha=0.2, label='$\pm2\sigma$')
+    axs[1].plot(experiments, mu_m_b, marker='>',linestyle='--', label='Updated $\mu_{market}$', color='purple')
+    axs[1].axhline(y=em['mu_m_r'].values[0], color='purple', linestyle='-', linewidth=5, label='Ground Truth $\mu_{market}$')
+    axs[1].set_title('Belief in $\mu_{market}$ by Time')
     axs[1].set_xlabel('Experiment')
     axs[1].set_xticks(experiments)
     axs[1].set_xticklabels([str(int(exp)) for exp in experiments])
-    axs[1].set_ylabel('Belief on $\mu_{c}$')
+    axs[1].set_ylabel('Belief on $\mu_{market}$')
     axs[1].legend(loc='lower left')
     axs[1].grid(True)
 
-
     plt.tight_layout()
     figure_title = em['em_name'].item() + "_L0.png"
     figure_path = os.path.join("data/figure/em/l0", figure_title)
@@ -78,154 +70,127 @@ def plot_layer0_belief(em):
     plt.close()
 
 def plot_layer1_profit(em):
-    P = em.dims['P']
-    ACT_PRED = em.dims['ACT_PRED']
-    ACT_PVT = em.dims['ACT_PVT']
-    ACT_PVT_colors = {'scale': 'red', 'pivot_market': 'purple', 'pivot_product': 'green'}
-    
-    num_exp = np.where(em['action'].values == "scale")[0][0] + 1 if "scale" in em['action'].values else em['action'].shape[0]
-    experiments = np.arange(1, num_exp+1)
-    fig = plt.figure(figsize=(4 * num_exp, 9))
-    fig.suptitle(em['em_name'].item())
-    gs = fig.add_gridspec(3, num_exp * 2, height_ratios=[1, 1, 1])  # Making the grid spec with more columns
-
-    profit_prior = em['profit_prior'][:num_exp, :4]  # Adjusting shape to be (num_exp, 4)
-    low_profit_b = em['low_profit_b'].values[:num_exp]
-    high_profit_b = em['high_profit_b'].values[:num_exp]
-
-    profit_obs = em['profit_obs'].values[:num_exp]
-    actions = em['action'].values[:num_exp]
+    cell_colors = {'ai-b2c': 'green', 'man-b2c': 'gray', 'man-b2b': 'purple', 'ai-b2b': 'orange'}
+    ACT_PVT_markers = {'pivot_product': 'B', 'pivot_market': 'M', 'scale': 'S'}
 
     # Create subplots
-    axs = np.empty((3, num_exp), dtype=object)
-    axs[0, 0] = fig.add_subplot(gs[0, :])  # Span the first row across all columns
-    for col in range(num_exp):
-        axs[1, col] = fig.add_subplot(gs[1, col * 2:(col + 1) * 2])  # Second row
-        axs[2, col] = fig.add_subplot(gs[2, col * 2:(col + 1) * 2], projection='3d')  # Third row
-                
-    # Initial plot for profit feedback with action dots in the first row
-    predicted_profits = [em['profit_b'][e2p[em['product'][0].item()], e2m[em['market'][0].item()], 0]]
-    for t in range(1, num_exp):
-        predicted_profits.append(em['profit_b'][e2p[em['product'][t].item()], e2m[em['market'][t].item()], t])                                                        
+    num_exp = np.where(em['action'].values == "scale")[0][0] + 1 if "scale" in em['action'].values else em['action'].shape[0]
     
-    axs[0, 0].scatter(np.array(range(1, num_exp + 1)), list(profit_obs), color=mcolors.to_rgba('purple', alpha=0.8), label='observed', marker='o')
-    axs[0, 0].plot(list(range(1, num_exp + 1)), predicted_profits, color='blue', label='expected', linestyle='--')
-    axs[0, 0].fill_between(list(range(1, num_exp + 1)), list(low_profit_b), list(high_profit_b), color='skyblue', alpha=0.3, label='low-high bar')
+    fig = plt.figure(figsize=(10 * num_exp, 15), dpi=300)
+    fig.suptitle(em['em_name'].item())
+    gs = fig.add_gridspec(2, num_exp * 2, height_ratios=[1, 1]) # Making the grid spec with more columns
+    axs = np.empty((2, num_exp), dtype=object)
     
-    ACT_PVT_markers = {'pivot_product': 'P', 'pivot_market': 'M', 'scale': 'S'}
-    for i in range(ACT_PVT):
-        markers = [ACT_PVT_markers.get(action, 'o') for action in actions]
-        for j, marker in enumerate(markers):
-            axs[0, 0].scatter(j + 1.2, -0.25, color='red', s=50, marker=f'${marker}$')
+    for col in range(num_exp):
+        axs[0, col] = fig.add_subplot(gs[0, col * 2:(col + 1) * 2]) # Second row
+        axs[1, col] = fig.add_subplot(gs[1, col * 2:(col + 1) * 2], projection='3d')
 
-    axs[0, 0].set_title('Expected Profit')
-    axs[0, 0].set_ylim(-3, 3)
-    axs[0, 0].legend(loc='upper left', prop={'size': 4})
-    axs[0, 0].set_xlabel('Experiment')
-    axs[0, 0].set_xticks(experiments)
+    fig.suptitle(em['em_name'].item())
 
-    # Plot posterior distributions in the second row
-    for t in experiments:
-        profit_b_prior = em['profit_prior'][t-1].values
-        profit_b_posterior = em['profit_post'][t-1].values
-        
-        axs[1, t-1].hist(profit_b_prior, bins=30, alpha=0.5, color='skyblue', edgecolor='white')
-        axs[1, t-1].hist(profit_b_posterior, bins=30, alpha=0.3, color='#7B68EE', edgecolor='white')
-        axs[1, t-1].axvline(x=em['profit_obs'][t-1].item(), color=mcolors.to_rgba('purple', alpha=0.8), linestyle='-', label='observed')
+    low_profit_b = em['low_profit_b'].values[:num_exp]
+    high_profit_b = em['high_profit_b'].values[:num_exp]
+
+    for t in range(num_exp):
+        p = em['product'][t].item()
+        m = em['market'][t].item()
+        pm_idx = f'{p}-{m}'
+        act_pvt = em['action'][t].item()
+
+        # Posterior distributions
+        profit_b_prior = em['profit_prior'][t].values
+        profit_b_posterior = em['profit_post'][t].values
         
-        axs[1, t-1].axvline(x=profit_b_prior.mean(), color='blue', linestyle='--', label='prior mean') # = em['profit_b'][e2p[em['product'][t-1].item()], e2m[em['market'][t-1].item()], t-1].item()
-        axs[1, t-1].axvline(x=profit_b_posterior.mean(), color='#7B68EE', linestyle='--', label='posterior mean')
+        axs[0, t].hist(profit_b_prior, bins=30, alpha=0.1, color=cell_colors[pm_idx], edgecolor='white')
+        axs[0, t].hist(profit_b_posterior, bins=30, alpha=.2, color=cell_colors[pm_idx], edgecolor='white')
+        axs[0, t].axvline(x=em['profit_obs'][t].item(), color=cell_colors[pm_idx], linestyle='-', label='observed profit')
+        axs[0, t].axvline(x=profit_b_prior.mean(), color=cell_colors[pm_idx], linestyle='--', label=f'predicted in {p}-{m}')
+        axs[0, t].axvline(x=low_profit_b[t], color=cell_colors[pm_idx], linestyle='-.', label='Low profit bar')
+        axs[0, t].axvline(x=high_profit_b[t], color=cell_colors[pm_idx], linestyle='-.', label='High profit bar')
         
-        axs[1, t-1].set_title(f'Time {t} Profit Experiment')
-        axs[1, t-1].legend()
-        axs[1, t-1].set_xlim(-3, 3)
-        axs[1, t-1].set_ylim(0, 500)
+        axs[0, t].set_title(f'EXPERIMENT {t+1}:\n ENV {pm_idx}, ACTION {act_pvt}->')
+        axs[0, t].legend(prop={'size':15})
+        axs[0, t].set_xlim(-3, 3)
+        axs[0, t].set_ylim(0, 500)
 
-    # Define colors for each cell
-    colors = ['gray', 'green', 'purple', 'orange']
     labels = ['man-b2c', 'ai-b2c', 'man-b2b', 'ai-b2b']
-    
-    for t in range(num_exp):
-        ax = axs[2, t]
-        # Create 2x2 grids for each experiment
-        x_positions = np.array([1, 0, 1, 0])
-        y_positions = np.array([0, 0, 1, 1])
-        dx = dy = 0.1  # Width and depth of the bars
-        z_positions_b = profit_prior[t, :]
-        z_positions_t = em['profit_r'].values.flatten()
-
-        # Plot bars for beliefs
-        ax.bar3d(x_positions, y_positions, np.zeros_like(x_positions), dx, dy, z_positions_b, color=colors, alpha=0.8, edgecolor='black')
-
-        # Plot bars for truth
-        ax.bar3d(x_positions, y_positions, np.zeros_like(x_positions), dx, dy, z_positions_t, color=colors, alpha=0.2, edgecolor='black')
-
-        # Add text annotations for beliefs
-        for i in range(4):
-            ax.text(x_positions[i], y_positions[i], z_positions_b[i], f'{z_positions_b[i]:.2f}', color='black', ha='right', va='bottom', fontsize=10)
+    cell_colors = {'man-b2c': 'gray', 'ai-b2c': 'green', 'man-b2b': 'purple', 'ai-b2b': 'orange'}  # updated colors
 
-        # Add text annotations for truth
-        for i in range(4):
-            ax.text(x_positions[i], y_positions[i], z_positions_t[i], f'{z_positions_t[i]:.2f}', color='black', ha='right', va='top', fontsize=10)
-
-        # Remove gray grid and set background to white
+    for t in range(num_exp):
+        ax = axs[1, t]
+        pd_mk_combinations = [(pd, mk) for pd in em.coords['PD'].values for mk in em.coords['MK'].values]
+        x_positions = np.array([0, 0, 1, 1])  # Example layout
+        y_positions = np.array([0, 1, 0, 1])
+
+        # Assuming em['profit_b'] and em['profit_r'] are structured to allow indexing by ACT_PRED, PD, and MK
+        for idx, (pd, mk) in enumerate(pd_mk_combinations):
+            z_position_b = em['profit_b'].loc[dict(PD=pd, MK=mk, ACT_PRED=t)]
+            z_position_t = em['profit_r'].loc[dict(PD=pd, MK=mk)]
+
+            # Determine the color based on PD and MK
+            label_index = f'{pd}-{mk}'
+            color = cell_colors[label_index]
+
+            ax.quiver(x_positions[idx], y_positions[idx], z_position_b, 
+                    0, 0, z_position_t - z_position_b, 
+                    arrow_length_ratio=0.1, color=color)
+            ax.scatter(x_positions[idx], y_positions[idx], z_position_b, color='black', s=50, facecolors='none')
+            ax.scatter(x_positions[idx], y_positions[idx], z_position_t, color=color, s=50)
+            ax.text(x_positions[idx], y_positions[idx], z_position_b, f'{z_position_b:.2f}', color='black', ha='left', va='bottom', fontsize=10)
+            ax.text(x_positions[idx], y_positions[idx], z_position_t, f'{z_position_t:.2f}', color='black', ha='right', va='top', fontsize=10)
+
+        colors = np.array([['gray','purple'], ['green', 'orange']]) 
+        x = np.linspace(0, 1, 3)
+        y = np.linspace(0, 1, 3)
+        X, Y = np.meshgrid(x, y)
+        Z = np.zeros_like(X)  # Plane at z=0
+        # Plot each quadrant with different color
+        for i in range(2):
+            for j in range(2):
+                ax.plot_surface(X[i:i+2, j:j+2], Y[i:i+2, j:j+2], Z[i:i+2, j:j+2],
+                                color=colors[i][j], alpha=.3, rstride=1, cstride=1)
         ax.xaxis.pane.fill = False
         ax.yaxis.pane.fill = False
         ax.zaxis.pane.fill = False
         ax.set_facecolor('white')
-
-        # Remove grid lines
         ax.grid(False)
-
         ax.set_xticks([])
         ax.set_yticks([])
         ax.set_zticks(np.arange(-2, 3, 1))
         ax.set_xticklabels([])
         ax.set_yticklabels([])
         ax.set_zticklabels(['', '', '', '', ''])
-        
-        ax.set_title(f'exp{t + 1}', fontsize=10, pad=20)
-
-        # Add custom labels
-        for i, (x, y) in enumerate(zip(x_positions, y_positions)):
-            ax.text(x, y, max(z_positions_b[i], z_positions_t[i]) + 0.3, f'{labels[i]}', color=colors[i], ha='center', va='bottom', fontsize=10)
+        ax.axis('off')
 
+    # Save the figure
     plt.subplots_adjust(wspace=0.5, hspace=0.3)
     figure_title = em['em_name'].item() + "_L1.png"
     figure_path = os.path.join("data/figure/em/l1", figure_title)
     plt.savefig(figure_path)
-    plt.show()
     plt.close()
 
+
 from ipywidgets import FloatSlider, IntSlider, Output
 import matplotlib.pyplot as plt
 
 def interact_tool():
     # Define sliders for the parameters mu_sum and mu_diff
-    mu_diff_slider = FloatSlider(min=-2, max=2, step=1, value=0, continuous_update=False, description="mu_diff")
+    mu_p2m_slider = FloatSlider(min=.2, max=2, step=.2, value=.2, continuous_update=False, description="mu_p2m")
     mu_sum_slider = FloatSlider(min=-4, max=0, step=1, value=-2, continuous_update=False, description="mu_sum")
-    sigma_slider = IntSlider(min=1, max=4, step=1, value=1, continuous_update=False, description="sigma")
-    k_slider = IntSlider(min=1, max=4, step=1, value=1, continuous_update=False, description="k")
+    sigma_slider = IntSlider(min=1, max=4, step=1, value=1, continuous_update=False, description="observation uc")
+    k_slider = IntSlider(min=1, max=4, step=1, value=1, continuous_update=False, description="decision uc")
     t_slider = IntSlider(min=2, max=4, step=1, value=2, continuous_update=False, description="experiment#")
     output = Output()
 
-    def func(mu_b_d, mu_c_d, mu_b_r, mu_c_r, sigma_profit, k, T, product='man', market='b2c'):
-        em = experiment(mu_b_d, mu_c_d, mu_b_r, mu_c_r, sigma_profit, T=T, k=k, product=product, market=market)
+    def func(mu_p2m, mu_sum, sigma_profit, k_sigma, T, product='man', market='b2c'):
+        em = experiment(mu_p2m, mu_sum, sigma_profit, k_sigma, T=T, product=product, market=market)
         fig0 = plot_layer0_belief(em)
         fig1 = plot_layer1_profit(em)
         return fig0, fig1
 
     def on_value_change(change):
-        mu_sum = mu_sum_slider.value
-        mu_diff = mu_diff_slider.value
-        mu_b_d = (mu_sum + mu_diff) / 2
-        mu_c_d = (mu_sum - mu_diff) / 2
-
         fig0, fig1 = func(
-            mu_b_d=mu_b_d,
-            mu_c_d=mu_c_d,
-            mu_b_r=3,
-            mu_c_r=1,
+            mu_p2m_slider.value, 
+            mu_sum_slider.value,
             sigma_profit=sigma_slider.value,
             k=k_slider.value,
             T=t_slider.value  # Assuming T is a constant or you can add a slider for T if needed
@@ -236,14 +201,14 @@ def on_value_change(change):
             plt.show(fig1)
 
     # Link the sliders to the on_value_change function
-    for slider in [mu_diff_slider, mu_sum_slider, sigma_slider, k_slider, t_slider]:
+    for slider in [mu_p2m_slider, mu_sum_slider, sigma_slider, k_slider, t_slider]:
         slider.observe(on_value_change, names='value')
     
     # Arrange sliders and output in a VBox layout
     interactive_simulation = VBox([
         HTML("<h1>Pivot Game</h1>"),
         HBox([
-            VBox([HTML("<b>Parameters</b>"), mu_diff_slider, mu_sum_slider, sigma_slider, k_slider,]),
+            VBox([HTML("<b>Parameters</b>"), mu_p2m_slider, mu_sum_slider, sigma_slider, k_slider,]),
         ]),
         output
     ])
@@ -251,10 +216,11 @@ def on_value_change(change):
     return interactive_simulation
 
 if __name__ == "__main__":
-    # func(mu_b_d= .2, mu_c_d= -.1, sigma_obs=.1, T=4, product='man', market='b2c')
+    # func(mu_p_d= .2, mu_m_d= -.1, sigma_obs=.1, T=4, product='man', market='b2c')
     # th = xr.open_dataset("data/th/bB[-0.2  0.   0.2]_cC[-0.2  0.   0.2]_a[0.4]_b0.2_c0.1_s0.1_cash4_E5_man_b2c")
     # plot_th_given_experiment(th)
-    em = experiment(mu_b_d= -3, mu_c_d= -1, mu_b_r=3, mu_c_r=1, sigma_profit=1, T=2, k=2, product = 'man', market = 'b2c')
+    
+    em = experiment(mu_p2m = 3, mu_sum = 4, sigma_profit=1, k_sigma=2, T=2, product = 'man', market = 'b2c')
     # em = xr.open_dataset("data/experiment/bB-0.2_cC-0.2_B0.3_C0.3_a0.1_s0.1_T10_man_b2c.nc")
-    plot_layer0_belief(em)
+    # plot_layer0_belief(em)
     plot_layer1_profit(em)
\ No newline at end of file
diff --git a/simulation/test_hypo.py b/simulation/test_hypo.py
index 0e70b64..10adf7d 100644
--- a/simulation/test_hypo.py
+++ b/simulation/test_hypo.py
@@ -8,23 +8,24 @@
 from interact_tool import plot_layer0_belief, plot_layer1_profit
 from collections import Counter
 
-#th-A: higher mu_a -> faster scale
-# in:  mu_a / (mu_b_r + mu_c_r)
-# out: time to first scale
-
-#th-bBcC: higher mu_b_d - mu_c_d -> higher product pivot ratio
-# in: mu_b_d - mu_c_d
-# out: ratio of first pivot_product to first pivot_market
-
-#th-A|BC: scott's idea on higher mu_a -> (b, c) combination that exceeds lowbar
-# in: mu_a
-# TODO out: ratio of cells whose mu_b, mu_c exceed lowbar (b or r)??  
-
-# def brute_force(mu_diff_range=np.linspace(-2, -2, 1), mu_sum_range=np.linspace(-4, 0, 2), sigma_profit_range = np.linspace(1, 2, 10), T=2,  product='man', market='b2c'):
-def brute_force(mu_diff_range=np.linspace(-3, 3, 3), mu_sum_range=np.linspace(-3, 3, 3), sigma_profit_range = np.linspace(.5, 2, 2), k_range =np.linspace(.5, 2, 2), T=3,  product='man', market='b2c'):    
-    mu_b_r=3
-    mu_c_r=1
-    th_name = f"mu_diff{mu_diff_range[0]}to{mu_diff_range[0]}l{len(mu_diff_range)}_mu_sum{mu_sum_range[0]}to{mu_sum_range[-1]}l{len(mu_sum_range)}_B{mu_b_r}_C{mu_c_r}_s{sigma_profit_range[0]}{sigma_profit_range[-1]}_k{k_range[0]}to{k_range[-1]}l{len(sigma_profit_range)}_T{T}_{product}_{market}"
+# 1.1 ground truth diff (observed profitability boost) 
+# - fig1: increase as mu_pc- increase (action follows hope)
+
+# 1.2 belief on how noisy learning is
+# - fig2: viscous prior with belief on noisy environment (P1|sigma=.1 vs P1|sigma=1 vs P1|sigma=3)
+
+# 1.3 setting k
+# - fig3: (size of org -> cost ratio of pivot ->) 1/k -> 1/R
+
+# todo: express decision k as a function of (-1)^{cost_pivot_product < cost_pivot_market}
+# 1.4 ground truth sum (observed profitability boost) 
+# - fig4: increase as mu_pc+ increase (action speed follows 
+# fixing mu_p and increasing mu_m VS fixing mu_m and increasing mu_p
+# TODO out: ratio of cells whose mu_p, mu_m exceed lowbar (b or r)??  
+
+# def brute_force(mu_p2m_range=np.linspace(-2, -2, 1), mu_sum_range=np.linspace(-4, 0, 2), sigma_profit_range = np.linspace(1, 2, 10), T=2,  product='man', market='b2c'):
+def brute_force(mu_p2m_range=np.linspace(.3, 3, 5), mu_sum_range=np.linspace(1, 4, 5), sigma_profit_range = np.linspace(.5, 2, 3), k_sigma_range =np.linspace(.5, 2, 3), T=2,  product='man', market='b2c'):    
+    th_name = f"p2m{mu_p2m_range[0]}to{mu_p2m_range[-1]}l{len(mu_p2m_range)}_sum{mu_sum_range[0]}to{mu_sum_range[-1]}l{len(mu_sum_range)}_s{sigma_profit_range[0]}{sigma_profit_range[-1]}_k{k_sigma_range[0]}to{k_sigma_range[-1]}l{len(sigma_profit_range)}_T{T}_{product}_{market}"
     file_path = f"data/theory/{th_name}.nc"
     if os.path.exists(file_path):
         th = xr.open_dataset(file_path)
@@ -33,34 +34,33 @@ def brute_force(mu_diff_range=np.linspace(-3, 3, 3), mu_sum_range=np.linspace(-3
     
     th = xr.Dataset(
         coords={
-            'mu_diff': mu_diff_range,
+            'mu_p2m': mu_p2m_range,
             'mu_sum': mu_sum_range,
             'sigma_profit': sigma_profit_range,
-            'k': k_range,
+            'k': k_sigma_range,
         },
         data_vars={
-            'pivot_ratio': (('mu_diff', 'mu_sum', 'sigma_profit', 'k'), np.full((len(mu_diff_range), len(mu_sum_range), len(sigma_profit_range), len(k_range)), np.nan)),
-            'reach_optimality': (('mu_diff', 'mu_sum', 'sigma_profit', 'k'), np.full((len(mu_diff_range), len(mu_sum_range), len(sigma_profit_range), len(k_range)), np.nan)),
-            'time_to_reach_optimality': (('mu_diff', 'mu_sum', 'sigma_profit', 'k'), np.full((len(mu_diff_range), len(mu_sum_range), len(sigma_profit_range), len(k_range)), np.nan)),
-            'act_seq': (('mu_diff', 'mu_sum', 'sigma_profit', 'k'), np.full((len(mu_diff_range), len(mu_sum_range), len(sigma_profit_range), len(k_range)),  '', dtype='object')), 
+            'pivot_ratio': (('mu_p2m', 'mu_sum', 'sigma_profit', 'k'), np.full((len(mu_p2m_range), len(mu_sum_range), len(sigma_profit_range), len(k_sigma_range)), np.nan)),
+            'reach_optimality': (('mu_p2m', 'mu_sum', 'sigma_profit', 'k'), np.full((len(mu_p2m_range), len(mu_sum_range), len(sigma_profit_range), len(k_sigma_range)), np.nan)),
+            'time_to_reach_optimality': (('mu_p2m', 'mu_sum', 'sigma_profit', 'k'), np.full((len(mu_p2m_range), len(mu_sum_range), len(sigma_profit_range), len(k_sigma_range)), np.nan)),
+            'act_seq': (('mu_p2m', 'mu_sum', 'sigma_profit', 'k'), np.full((len(mu_p2m_range), len(mu_sum_range), len(sigma_profit_range), len(k_sigma_range)),  '', dtype='object')), 
             'th_name': ((), th_name),
         }
     )
     
-    for mu_diff in mu_diff_range:
+    for mu_p2m in mu_p2m_range:
         for mu_sum in mu_sum_range:  
             for sigma_profit in sigma_profit_range:
-                for k in k_range:  
-                    mu_b_d = (mu_sum + mu_diff) / 2
-                    mu_c_d = (mu_sum - mu_diff) / 2
-                    em = experiment(mu_b_d, mu_c_d, mu_b_r, mu_c_r, sigma_profit, k, T, product, market)
+                for k_sigma in k_sigma_range:  
+
+                    em = experiment(mu_p2m, mu_sum, sigma_profit, k_sigma, T, product, market) # assuming init mu are 0, mu_p_d = mu_p_r
                     plot_layer0_belief(em)
                     plot_layer1_profit(em)
                     if em is not None:     
-                        th['pivot_ratio'].loc[dict(mu_diff=mu_diff, mu_sum=mu_sum, sigma_profit = sigma_profit, k=k)] = compute_pivot_ratio(em)
-                        th['reach_optimality'].loc[dict(mu_diff=mu_diff, mu_sum=mu_sum, sigma_profit = sigma_profit, k=k)] = compute_reach_optimality(em)
-                        th['time_to_reach_optimality'].loc[dict(mu_diff=mu_diff, mu_sum=mu_sum, sigma_profit = sigma_profit, k=k)] = compute_time_to_reach_optimality(em)
-                        th['act_seq'].loc[dict(mu_diff = mu_diff, mu_sum = mu_sum, sigma_profit = sigma_profit, k=k)] = compute_sequence(em)[0]
+                        th['pivot_ratio'].loc[dict(mu_p2m=mu_p2m, mu_sum=mu_sum, sigma_profit = sigma_profit, k=k_sigma)] = compute_pivot_ratio(em)
+                        th['reach_optimality'].loc[dict(mu_p2m=mu_p2m, mu_sum=mu_sum, sigma_profit = sigma_profit, k=k_sigma)] = compute_reach_optimality(em)
+                        th['time_to_reach_optimality'].loc[dict(mu_p2m=mu_p2m, mu_sum=mu_sum, sigma_profit = sigma_profit, k=k_sigma)] = compute_time_to_reach_optimality(em)
+                        th['act_seq'].loc[dict(mu_p2m = mu_p2m, mu_sum = mu_sum, sigma_profit = sigma_profit, k=k_sigma)] = compute_sequence(em)
     th.to_netcdf(f"data/theory/{th.th_name.values}.nc")
 
     plot_theory_given_experiment(th)  # Call the plotting function after running the experiments
@@ -72,16 +72,16 @@ def compute_pivot_ratio(em):
     pivot_market_count = 0
 
     for action in actions:
-        if action == 'scale' or action == 'fail':
+        if action == 'scale':
             break
         if action == 'pivot_product':
             pivot_product_count += 1
         if action == 'pivot_market':
             pivot_market_count += 1
 
-    if pivot_market_count == 0:
-        return em.dims['ACT_PRED'] * 2 # Avoid division by zero
-    return pivot_product_count / pivot_market_count
+    # if pivot_market_count == 0:
+    #     return em.dims['ACT_PRED'] * 2 # Avoid division by zero
+    return (pivot_product_count+1) / (pivot_market_count + 1)
 
 def compute_reach_optimality(em):
     products = em['product'].values
@@ -109,7 +109,7 @@ def compute_sequence(em, sequence_length=3):
     actions = [a.replace('pivot_market', 'm') for a in actions]
     actions = [a.replace('scale', 's') for a in actions]
     
-    action_sequences = [''.join(actions[i:i+sequence_length]) for i in range(len(actions) - sequence_length + 1)]
+    action_sequences = ''.join(actions) #[''.join(actions[i:i+sequence_length]) for i in range(len(actions) - sequence_length + 1)]
     # sequence_counts = Counter(action_sequences)
     
     # # Sort sequences by frequency
@@ -139,28 +139,28 @@ def plot_metrics(ax, x_values, pivot_ratio, reach_optimality, time_to_reach_opti
         ax.legend()
 
     # Extracting values
-    mu_diff = th['mu_diff'].values
+    mu_p2m = th['mu_p2m'].values
     mu_sum = th['mu_sum'].values
     sigma_profit = th['sigma_profit'].values
     k = th['k'].values
 
-    pivot_ratio_mu_diff = th['pivot_ratio'].mean(dim=['mu_sum', 'sigma_profit', 'k']).values
-    reach_optimality_mu_diff = th['reach_optimality'].mean(dim=['mu_sum', 'sigma_profit', 'k']).values
-    time_to_reach_optimality_mu_diff = th['time_to_reach_optimality'].mean(dim=['mu_sum', 'sigma_profit', 'k']).values
+    pivot_ratio_mu_p2m = th['pivot_ratio'].mean(dim=['mu_sum', 'sigma_profit', 'k']).values
+    reach_optimality_mu_p2m = th['reach_optimality'].mean(dim=['mu_sum', 'sigma_profit', 'k']).values
+    time_to_reach_optimality_mu_p2m = th['time_to_reach_optimality'].mean(dim=['mu_sum', 'sigma_profit', 'k']).values
 
-    pivot_ratio_mu_sum = th['pivot_ratio'].mean(dim=['mu_diff', 'sigma_profit', 'k']).values
-    reach_optimality_mu_sum = th['reach_optimality'].mean(dim=['mu_diff', 'sigma_profit', 'k']).values
-    time_to_reach_optimality_mu_sum = th['time_to_reach_optimality'].mean(dim=['mu_diff', 'sigma_profit', 'k']).values
+    pivot_ratio_mu_sum = th['pivot_ratio'].mean(dim=['mu_p2m', 'sigma_profit', 'k']).values
+    reach_optimality_mu_sum = th['reach_optimality'].mean(dim=['mu_p2m', 'sigma_profit', 'k']).values
+    time_to_reach_optimality_mu_sum = th['time_to_reach_optimality'].mean(dim=['mu_p2m', 'sigma_profit', 'k']).values
 
-    pivot_ratio_sigma_profit = th['pivot_ratio'].mean(dim=['mu_diff', 'mu_sum', 'k']).values
-    reach_optimality_sigma_profit = th['reach_optimality'].mean(dim=['mu_diff', 'mu_sum', 'k']).values
-    time_to_reach_optimality_sigma_profit = th['time_to_reach_optimality'].mean(dim=['mu_diff', 'mu_sum', 'k']).values
+    pivot_ratio_sigma_profit = th['pivot_ratio'].mean(dim=['mu_p2m', 'mu_sum', 'k']).values
+    reach_optimality_sigma_profit = th['reach_optimality'].mean(dim=['mu_p2m', 'mu_sum', 'k']).values
+    time_to_reach_optimality_sigma_profit = th['time_to_reach_optimality'].mean(dim=['mu_p2m', 'mu_sum', 'k']).values
 
-    pivot_ratio_k = th['pivot_ratio'].mean(dim=['mu_diff', 'mu_sum', 'sigma_profit']).values
-    reach_optimality_k = th['reach_optimality'].mean(dim=['mu_diff', 'mu_sum', 'sigma_profit']).values
-    time_to_reach_optimality_k = th['time_to_reach_optimality'].mean(dim=['mu_diff', 'mu_sum', 'sigma_profit']).values
+    pivot_ratio_k = th['pivot_ratio'].mean(dim=['mu_p2m', 'mu_sum', 'sigma_profit']).values
+    reach_optimality_k = th['reach_optimality'].mean(dim=['mu_p2m', 'mu_sum', 'sigma_profit']).values
+    time_to_reach_optimality_k = th['time_to_reach_optimality'].mean(dim=['mu_p2m', 'mu_sum', 'sigma_profit']).values
 
-    plot_metrics(axs[0], mu_diff, pivot_ratio_mu_diff, reach_optimality_mu_diff, time_to_reach_optimality_mu_diff, 'mu_diff', 'Metrics by mu_diff')
+    plot_metrics(axs[0], mu_p2m, pivot_ratio_mu_p2m, reach_optimality_mu_p2m, time_to_reach_optimality_mu_p2m, 'mu_p2m', 'Metrics by mu_p2m')
     plot_metrics(axs[1], mu_sum, pivot_ratio_mu_sum, reach_optimality_mu_sum, time_to_reach_optimality_mu_sum, 'mu_sum', 'Metrics by mu_sum')
     plot_metrics(axs[2], sigma_profit, pivot_ratio_sigma_profit, reach_optimality_sigma_profit, time_to_reach_optimality_sigma_profit, 'sigma_profit', 'Metrics by sigma_profit')
     plot_metrics(axs[3], k, pivot_ratio_k, reach_optimality_k, time_to_reach_optimality_k, 'k', 'Metrics by k')
@@ -190,35 +190,35 @@ def plot_metrics(ax, x_values, pivot_ratio, reach_optimality, time_to_reach_opti
     
 def extract_mu_values_from_filename(filename):
     parts = filename.split('_')
-    mu_b_d_str = [p for p in parts if p.startswith('bB')][0]
-    mu_c_d_str = [p for p in parts if p.startswith('cC')][0]
-    mu_b_d = float(mu_b_d_str[2:].strip('[]').split()[0])
-    mu_c_d = float(mu_c_d_str[2:].strip('[]').split()[0])
-    return mu_b_d, mu_c_d
+    mu_p_d_str = [p for p in parts if p.startswith('B')][0]
+    mu_m_d_str = [p for p in parts if p.startswith('C')][0]
+    mu_p_d = float(mu_p_d_str[2:].strip('[]').split()[0])
+    mu_m_d = float(mu_m_d_str[2:].strip('[]').split()[0])
+    return mu_p_d, mu_m_d
 
 def plot_actions_and_ratios_from_nc_files(directory='data/experiment'):
     # Collect all .nc files in the directory
     nc_files = [f for f in os.listdir(directory) if f.endswith('.nc')]
     
     action_colors = {'pivot_product': 'green', 'pivot_market': 'purple', 'scale': 'red'}
-    mu_b_d_values = []
-    mu_c_d_values = []
+    mu_p_d_values = []
+    mu_m_d_values = []
     actions = []
 
-    # Read through each .nc file and extract mu_b_d, mu_c_d and actions
+    # Read through each .nc file and extract mu_p_d, mu_m_d and actions
     for nc_file in nc_files:
         file_path = os.path.join(directory, nc_file)
         ds = xr.open_dataset(file_path)
         
-        mu_b_d, mu_c_d = extract_mu_values_from_filename(nc_file)
-        mu_b_d_values.append(mu_b_d)
-        mu_c_d_values.append(mu_c_d)
+        mu_p_d, mu_m_d = extract_mu_values_from_filename(nc_file)
+        mu_p_d_values.append(mu_p_d)
+        mu_m_d_values.append(mu_m_d)
         actions.append(ds['action'].values)
     
     # Create subplots
     fig, ax = plt.subplots(2, 1, figsize=(12, 12))
 
-    # Plot for mu_b_d and mu_c_d (Ratio of pivot_product to pivot_market)
+    # Plot for mu_p_d and mu_m_d (Ratio of pivot_product to pivot_market)
     pivot_ratios = []
     for action_list in actions:
         pivot_product_count = sum(1 for action in action_list if action == 'pivot_product')
@@ -229,10 +229,10 @@ def plot_actions_and_ratios_from_nc_files(directory='data/experiment'):
             pivot_ratio = np.nan  # Avoid division by zero
         pivot_ratios.append(pivot_ratio)
 
-    scatter = ax[1].scatter(mu_b_d_values, mu_c_d_values, c=pivot_ratios, cmap='viridis', edgecolor='k', s=100)
-    ax[1].set_xlabel('mu_b_d')
-    ax[1].set_ylabel('mu_c_d')
-    ax[1].set_title('Ratio of pivot_product to pivot_market vs mu_b_d and mu_c_d')
+    scatter = ax[1].scatter(mu_p_d_values, mu_m_d_values, c=pivot_ratios, cmap='viridis', edgecolor='k', s=100)
+    ax[1].set_xlabel('mu_p_d')
+    ax[1].set_ylabel('mu_m_d')
+    ax[1].set_title('Ratio of pivot_product to pivot_market vs mu_p_d and mu_m_d')
     fig.colorbar(scatter, ax=ax[1], label='Pivot Product / Pivot Market Ratio')
     
     plt.tight_layout()
@@ -243,12 +243,11 @@ def plot_actions_and_ratios_from_nc_files(directory='data/experiment'):
 
 if __name__ == "__main__":
     th = brute_force()
-    # th = xr.open_dataset("data/theory/mu_diff-4.0to-4.0_mu_sum-4.0to0.0_B3_C1_s1.01.0_k1.0to1.0_T3_man_b2c.nc")
-    # th = xr.open_dataset("data/theory/mu_diff-4.0to-4.0_mu_sum-4.0to0.0_B3_C1_s1.03.0_k1.0to3.0_T3_man_b2c.nc")
+    # th = xr.open_dataset("data/theory/mu_p2m-4.0to-4.0_mu_sum-4.0to0.0_B3_C1_s1.01.0_k1.0to1.0_T3_man_b2c.nc")
     plot_theory_given_experiment(th)
     # test_hypothesis_1
     
-    # for mu_c_d in mu_c_d_range:
+    # for mu_m_d in mu_m_d_range:
 
     # for mu_a_d in mu_a_d_range:
 

From 64388fed258da4efdfc2151eedbeae3a4e67263e Mon Sep 17 00:00:00 2001
From: amoon <amoon@mit.edu>
Date: Fri, 21 Jun 2024 07:20:10 -0400
Subject: [PATCH 2/2] fixed bug (wrong predicted profitability in stan) and
 validated test_hypo

---
 simulation/fit_pm.py               | 72 ++++++++++++++----------------
 simulation/interact_tool.py        | 61 ++++++++++---------------
 simulation/stan/pivot_market.stan  | 12 ++---
 simulation/stan/pivot_product.stan | 12 ++---
 simulation/test_hypo.py            |  6 +--
 5 files changed, 72 insertions(+), 91 deletions(-)

diff --git a/simulation/fit_pm.py b/simulation/fit_pm.py
index 499feb7..a714b0d 100644
--- a/simulation/fit_pm.py
+++ b/simulation/fit_pm.py
@@ -1,4 +1,3 @@
-# fit_pm.py
 import numpy as np
 import xarray as xr
 import cmdstanpy
@@ -18,24 +17,24 @@ def sample_profit_obs(t, em):
     return mu2profit(em['mu_p_r'].item(), em['mu_m_r'].item(), em['product'][t].item(), em['market'][t].item())
 
 
-def decide_action(profit_obs, low_profit_b, high_profit_b, CR=.5):
+def decide_action(profit_obs, low_profit_b, high_profit_b, CR):
     """Decide the next action based on predicted and observed profit and cost considerations."""
 
-    if CR > 1:  #🐣 pivot_product cost > pivot_market cost
+    if CR > 1:  # pivot_product cost > pivot_market cost (deeptech, 🐣 biotech with core technology)
         if profit_obs > high_profit_b:
             return "scale"
         elif low_profit_b <= profit_obs <= high_profit_b:
-            return "pivot_product"
-        else: 
             return "pivot_market"
+        else: 
+            return "pivot_product" # angie won't abandon prob.comp unless sun rises from the west
 
-    else:  #🦖 pivot_product cost <= pivot_market cost
+    else:  # pivot_product cost <= pivot_market cost (it, 🦖big pharma with rigidity on distribution channel)
         if profit_obs > high_profit_b:
-            return "scale"
+            return "scale" 
         elif low_profit_b <= profit_obs <= high_profit_b:
-            return "pivot_market"
-        else: 
             return "pivot_product"
+        else: 
+            return "pivot_market"
 
 
 def mu2profit(mu_p, mu_m, product, market):
@@ -54,7 +53,7 @@ def predict_observe_update_as_lbs(t, em):
     # PREDICT BAL: B[t]|A[t], L[t]
     for p in em.coords['PD'].values:
         for m in em.coords['MK'].values:
-            em['profit_b'].loc[dict(PD=p, MK=m, ACT_PRED=t)] = mu2profit(em['mu_p_b'][t], em['mu_m_b'][t], p, m)
+            em['profit_b'].loc[dict(PD=p, MK=m, PRED=t)] = mu2profit(em['mu_p_b'][t], em['mu_m_b'][t], p, m)
     
     em['low_profit_b'][t] = em['profit_b'][e2p[em['product'][t].item()], e2m[em['market'][t].item()], t].item() - em['k_sigma'].item() * em['sigma_profit'][t].item()
     em['high_profit_b'][t] = em['profit_b'][e2p[em['product'][t].item()], e2m[em['market'][t].item()], t].item() + em['k_sigma'].item() * em['sigma_profit'][t].item()
@@ -72,10 +71,10 @@ def predict_observe_update_as_lbs(t, em):
     
     if em['action'][t] == "pivot_product":
         # UPDATE A ABC: A[t+1]| A[t], B[t], C[A[t]]            
-        # if t < em.dims['ACT_PRED']-1:
+        # if t < em.dims['PRED']-1:
         em['product'][t+1] = 'man' if em['product'][t].item() == 'ai' else 'ai'
         em['market'][t+1] = em['market'][t].item()
-        
+            
         # UPDATE L LBC: L[t+1]| L[t], B[t], C[A[t]] (store posterior, set next stage prior)
         data = {
             'product': e2p[em.product[t].item()] + 1,
@@ -97,10 +96,10 @@ def predict_observe_update_as_lbs(t, em):
 
     elif em['action'][t] == "pivot_market":
         # UPDATE A ABC: A[t+1]| A[t], B[t], C[A[t]]
-        # if t < em.dims['ACT_PRED']-1:
+        # if t < em.dims['PRED']-1:
         em['market'][t+1] = 'b2b' if em['market'][t].item() == 'b2c' else 'b2c'
         em['product'][t+1] = em['product'][t].item()
-        
+            
         # UPDATE L LBC: L[t+1]| L[t], B[t], C[A[t]] (store posterior, set next stage prior)
         data = {
             'product': e2p[em.product[t].item()] + 1,
@@ -121,14 +120,14 @@ def predict_observe_update_as_lbs(t, em):
         em['sigma_mu_m'][t+1] = em['mu_m_b_prior'][t+1].std()
 
     # use A[t+1] reparameterization structure for L[t]
-    em['profit_post'][t] = mu2profit(em['mu_m_b'][t+1], em['mu_p_b'][t+1], em['product'][t].item(), em['market'][t].item())
-    
+    # em['profit_post'][t] = mu2profit(em['mu_m_b'][t+1], em['mu_p_b'][t+1], em['product'][t].item(), em['market'][t].item())
     em['profit_prior'][t+1] = mu2profit(em['mu_p_b_prior'][t+1], em['mu_m_b_prior'][t+1], em['product'][t+1].item(), em['market'][t+1].item())
     em['sigma_profit'][t+1] = em['profit_prior'][t+1].std() # CHECKED em['sigma_profit'][t+1]**2 (.67) ~  (em['sigma_mu_p'][t+1]**2 + em['sigma_mu_m'][t+1]**2)/4 (.69)
+    
     return em
 
 
-def experiment(mu_p2m, mu_sum, sigma_profit, k_sigma, T, product='man', market='b2c'):
+def experiment(mu_p2m, mu_sum, sigma_profit, k_sigma, T, product='man', market='b2c', CR = 2):
     """
     Record the expected reward from the experiment given initial parameters.
     mu_p_d = (mu_sum * mu_p2m)/ (mu_p2m + 1)
@@ -137,14 +136,14 @@ def experiment(mu_p2m, mu_sum, sigma_profit, k_sigma, T, product='man', market='
     mu_p_d = mu_p_r - mu_p_b (=0) = mu_p_r # belief and goal differ but we treat both as _b
     mu_m_d = mu_m_r - mu_m_b (=0) = mu_m_r
     """
-    coords = {'H': np.arange(1), 'B': np.arange(T+1), 'ACT_PRED': np.arange(T), 'ACT_PVT': np.arange(T), 'PD': ["man", "ai"], 'MK': ["b2c", "b2b"], 'M': np.arange(4000)}
-    em_name = f"p2m-ratio{mu_p2m}_sum{mu_sum}_sigma{sigma_profit}_k-sigma{k_sigma}_exp{T}_{product}_{market}"
+    coords = {'H': np.arange(1), 'B': np.arange(T+1), 'PRED': np.arange(T), 'ACT': np.arange(T), 'PD': ["man", "ai"], 'MK': ["b2c", "b2b"], 'M': np.arange(4000)}
+    em_name = f"p2m-ratio{mu_p2m}_sum{mu_sum}_sigma{sigma_profit}_k-sigma{k_sigma}_exp{T}_CR{CR}_{product}_{market}"
     file_path = f"data/experiment/{em_name}.nc"
 
-    if os.path.exists(file_path):
-        em = xr.open_dataset(file_path)
-        print(f"File {em_name} already exists.")
-        return em
+    # if os.path.exists(file_path):
+    #     em = xr.open_dataset(file_path)
+    #     print(f"File {em_name} already exists.")
+    #     return em
 
     data_vars = {
         'mu_p_d': ('B', np.zeros(T+1)), 
@@ -156,27 +155,25 @@ def experiment(mu_p2m, mu_sum, sigma_profit, k_sigma, T, product='man', market='
         'sigma_mu_m':('B', np.zeros(T+1)),
 
         # PREDICT
-        'market': ('ACT_PRED', np.full(T, '', dtype='object')), #entrant randomly choose (same Eprofit currently (belief comes from future))
-        'product': ('ACT_PRED', np.full(T, '', dtype='object')),
+        'market': ('B', np.full(T+1, '', dtype='object')), #entrant randomly choose (same Eprofit currently (belief comes from future))
+        'product': ('B', np.full(T+1, '', dtype='object')),
 
-        'profit_b': (('PD', 'MK', 'ACT_PRED'), np.zeros((2, 2, T))),
-        'low_profit_b': ('ACT_PRED', np.zeros(T)),
-        'high_profit_b': ('ACT_PRED', np.zeros(T)),
+        'profit_b': (('PD', 'MK', 'PRED'), np.zeros((2, 2, T))),
+        'low_profit_b': ('PRED', np.zeros(T)),
+        'high_profit_b': ('PRED', np.zeros(T)),
 
         # OBSERVE
-        'profit_obs': ('ACT_PRED', np.zeros(T)),
+        'profit_obs': ('PRED', np.zeros(T)),
 
         # UPDATED BIT STATE
-        'profit_prior': (('ACT_PRED', 'M'), np.zeros((T,4000))),
-        'profit_post': (('ACT_PRED', 'M'), np.zeros((T,4000))),
+        'profit_prior': (('B', 'M'), np.zeros((T+1,4000))), # even if no more experiment opp, it updates belief (parameter)
+        # 'profit_post': (('PRED', 'M'), np.zeros((T,4000))),
 
         'mu_p_b_prior': (('B', 'M'), np.zeros((T+1,4000))), # updating belief is expensive - do only after observation
         'mu_m_b_prior': (('B', 'M'), np.zeros((T+1,4000))),
-        # 'mu_p_b_post': (('ACT_PRED', 'M'), np.zeros((T,4000))), # one lag
-        # 'mu_m_b_post': (('ACT_PRED', 'M'), np.zeros((T,4000))),
 
         # UPDATE ATOM STATE
-        'action': ('ACT_PVT', np.full(T, '', dtype='object')),
+        'action': ('ACT', np.full(T, '', dtype='object')),
         
         'mu_p_r': ('H', np.zeros(1)),
         'mu_m_r': ('H', np.zeros(1)), 
@@ -187,7 +184,7 @@ def experiment(mu_p2m, mu_sum, sigma_profit, k_sigma, T, product='man', market='
     em = xr.Dataset(data_vars=data_vars, coords=coords)
     em['profit_r'] = (('PD', 'MK'), np.zeros((len(em.coords['PD']), len(em.coords['MK']))))
 
-    for t in range(em.dims['ACT_PRED']-1):
+    for t in range(em.dims['PRED']): # t=0,1,2
         if t == 0:
             em['mu_p_r'][0] = (mu_sum * mu_p2m)/ (mu_p2m + 1)
             em['mu_m_r'][0] = mu_sum/ (mu_p2m + 1)
@@ -195,7 +192,7 @@ def experiment(mu_p2m, mu_sum, sigma_profit, k_sigma, T, product='man', market='
             em['mu_m_b'][0] = 0 
             em['mu_p_d'][0] = em['mu_p_r'][0]
             em['mu_m_d'][0] = em['mu_m_r'][0]
-            em['CR'][0] = .5 #cost_pivot_product / cost_pivot_market; 🐣CR=2, 🦖CR=.5
+            em['CR'][0] = CR #cost_pivot_product / cost_pivot_market; 🐣CR=2, 🦖CR=.5
 
             em['sigma_mu_p'][0] = sigma_profit * np.sqrt(2)
             em['sigma_mu_m'][0] = sigma_profit * np.sqrt(2)
@@ -222,5 +219,4 @@ def experiment(mu_p2m, mu_sum, sigma_profit, k_sigma, T, product='man', market='
     return em
 
 if __name__ == "__main__":     
-    # Start the experiment
-    em = experiment(mu_p2m = 3, mu_sum = 4, sigma_profit = 1, k_sigma=2,  T=2)
\ No newline at end of file
+    em = experiment(mu_p2m = 3, mu_sum = 4, sigma_profit=1, k_sigma=3, T=2, product = 'man', market = 'b2c')
\ No newline at end of file
diff --git a/simulation/interact_tool.py b/simulation/interact_tool.py
index 94d359f..0e2ac27 100644
--- a/simulation/interact_tool.py
+++ b/simulation/interact_tool.py
@@ -69,9 +69,12 @@ def plot_layer0_belief(em):
     # plt.show()
     plt.close()
 
+import matplotlib.pyplot as plt
+import numpy as np
+
 def plot_layer1_profit(em):
     cell_colors = {'ai-b2c': 'green', 'man-b2c': 'gray', 'man-b2b': 'purple', 'ai-b2b': 'orange'}
-    ACT_PVT_markers = {'pivot_product': 'B', 'pivot_market': 'M', 'scale': 'S'}
+    ACT_markers = {'pivot_product': 'B', 'pivot_market': 'M', 'scale': 'S'}
 
     # Create subplots
     num_exp = np.where(em['action'].values == "scale")[0][0] + 1 if "scale" in em['action'].values else em['action'].shape[0]
@@ -82,8 +85,8 @@ def plot_layer1_profit(em):
     axs = np.empty((2, num_exp), dtype=object)
     
     for col in range(num_exp):
-        axs[0, col] = fig.add_subplot(gs[0, col * 2:(col + 1) * 2]) # Second row
-        axs[1, col] = fig.add_subplot(gs[1, col * 2:(col + 1) * 2], projection='3d')
+        axs[0, col] = fig.add_subplot(gs[0, col * 2:(col + 1) * 2]) # First row
+        axs[1, col] = fig.add_subplot(gs[1, col * 2:(col + 1) * 2]) # Second row
 
     fig.suptitle(em['em_name'].item())
 
@@ -94,20 +97,19 @@ def plot_layer1_profit(em):
         p = em['product'][t].item()
         m = em['market'][t].item()
         pm_idx = f'{p}-{m}'
-        act_pvt = em['action'][t].item()
+        ACT = em['action'][t].item()
 
         # Posterior distributions
         profit_b_prior = em['profit_prior'][t].values
-        profit_b_posterior = em['profit_post'][t].values
+        # profit_b_posterior = em['profit_post'][t].values
         
         axs[0, t].hist(profit_b_prior, bins=30, alpha=0.1, color=cell_colors[pm_idx], edgecolor='white')
-        axs[0, t].hist(profit_b_posterior, bins=30, alpha=.2, color=cell_colors[pm_idx], edgecolor='white')
+        # axs[0, t].hist(profit_b_posterior, bins=30, alpha=.4, color=cell_colors[pm_idx], edgecolor='white')
         axs[0, t].axvline(x=em['profit_obs'][t].item(), color=cell_colors[pm_idx], linestyle='-', label='observed profit')
-        axs[0, t].axvline(x=profit_b_prior.mean(), color=cell_colors[pm_idx], linestyle='--', label=f'predicted in {p}-{m}')
-        axs[0, t].axvline(x=low_profit_b[t], color=cell_colors[pm_idx], linestyle='-.', label='Low profit bar')
-        axs[0, t].axvline(x=high_profit_b[t], color=cell_colors[pm_idx], linestyle='-.', label='High profit bar')
+        axs[0, t].axvline(x=low_profit_b[t], color=cell_colors[pm_idx], linestyle='--', linewidth=4, label='Low profit bar')
+        axs[0, t].axvline(x=high_profit_b[t], color=cell_colors[pm_idx], linestyle='--', linewidth=4, label='High profit bar')
         
-        axs[0, t].set_title(f'EXPERIMENT {t+1}:\n ENV {pm_idx}, ACTION {act_pvt}->')
+        axs[0, t].set_title(f'EXPERIMENT {t+1}:\n ENV {pm_idx}, ACTION {ACT}      ->')
         axs[0, t].legend(prop={'size':15})
         axs[0, t].set_xlim(-3, 3)
         axs[0, t].set_ylim(0, 500)
@@ -121,44 +123,27 @@ def plot_layer1_profit(em):
         x_positions = np.array([0, 0, 1, 1])  # Example layout
         y_positions = np.array([0, 1, 0, 1])
 
-        # Assuming em['profit_b'] and em['profit_r'] are structured to allow indexing by ACT_PRED, PD, and MK
+        # Assuming em['profit_b'] and em['profit_r'] are structured to allow indexing by PRED, PD, and MK
         for idx, (pd, mk) in enumerate(pd_mk_combinations):
-            z_position_b = em['profit_b'].loc[dict(PD=pd, MK=mk, ACT_PRED=t)]
+            z_position_b = em['profit_b'].loc[dict(PD=pd, MK=mk, PRED=t)]
             z_position_t = em['profit_r'].loc[dict(PD=pd, MK=mk)]
 
             # Determine the color based on PD and MK
             label_index = f'{pd}-{mk}'
             color = cell_colors[label_index]
 
-            ax.quiver(x_positions[idx], y_positions[idx], z_position_b, 
-                    0, 0, z_position_t - z_position_b, 
-                    arrow_length_ratio=0.1, color=color)
-            ax.scatter(x_positions[idx], y_positions[idx], z_position_b, color='black', s=50, facecolors='none')
-            ax.scatter(x_positions[idx], y_positions[idx], z_position_t, color=color, s=50)
-            ax.text(x_positions[idx], y_positions[idx], z_position_b, f'{z_position_b:.2f}', color='black', ha='left', va='bottom', fontsize=10)
-            ax.text(x_positions[idx], y_positions[idx], z_position_t, f'{z_position_t:.2f}', color='black', ha='right', va='top', fontsize=10)
-
-        colors = np.array([['gray','purple'], ['green', 'orange']]) 
-        x = np.linspace(0, 1, 3)
-        y = np.linspace(0, 1, 3)
-        X, Y = np.meshgrid(x, y)
-        Z = np.zeros_like(X)  # Plane at z=0
-        # Plot each quadrant with different color
-        for i in range(2):
-            for j in range(2):
-                ax.plot_surface(X[i:i+2, j:j+2], Y[i:i+2, j:j+2], Z[i:i+2, j:j+2],
-                                color=colors[i][j], alpha=.3, rstride=1, cstride=1)
-        ax.xaxis.pane.fill = False
-        ax.yaxis.pane.fill = False
-        ax.zaxis.pane.fill = False
+            # Plotting 2D instead of 3D
+            ax.scatter(x_positions[idx], z_position_b, color='black', s=50, facecolors='none')
+            ax.scatter(x_positions[idx], z_position_t, color=color, s=50)
+            ax.text(x_positions[idx], z_position_t, f'({z_position_b:.2f}, ', color='black', ha='left', va='bottom', fontsize=10)
+            ax.text(x_positions[idx], z_position_t, f'{z_position_t:.2f})', color='black', ha='left', va='bottom', fontsize=10, fontweight='bold')
+
         ax.set_facecolor('white')
         ax.grid(False)
         ax.set_xticks([])
         ax.set_yticks([])
-        ax.set_zticks(np.arange(-2, 3, 1))
-        ax.set_xticklabels([])
-        ax.set_yticklabels([])
-        ax.set_zticklabels(['', '', '', '', ''])
+        ax.set_xlim(-1, 2)
+        ax.set_ylim(-2, 3)
         ax.axis('off')
 
     # Save the figure
@@ -220,7 +205,7 @@ def on_value_change(change):
     # th = xr.open_dataset("data/th/bB[-0.2  0.   0.2]_cC[-0.2  0.   0.2]_a[0.4]_b0.2_c0.1_s0.1_cash4_E5_man_b2c")
     # plot_th_given_experiment(th)
     
-    em = experiment(mu_p2m = 3, mu_sum = 4, sigma_profit=1, k_sigma=2, T=2, product = 'man', market = 'b2c')
+    em = experiment(mu_p2m = 3, mu_sum = 4, sigma_profit=1, k_sigma=1, T=3, product = 'man', market = 'b2c')
     # em = xr.open_dataset("data/experiment/bB-0.2_cC-0.2_B0.3_C0.3_a0.1_s0.1_T10_man_b2c.nc")
     # plot_layer0_belief(em)
     plot_layer1_profit(em)
\ No newline at end of file
diff --git a/simulation/stan/pivot_market.stan b/simulation/stan/pivot_market.stan
index 5834bbf..172eb6a 100644
--- a/simulation/stan/pivot_market.stan
+++ b/simulation/stan/pivot_market.stan
@@ -2,20 +2,20 @@ data {
   real profit_obs; // Observations from sample_profit_obs function
   int<lower=1> market; // Market index
   int<lower=1> product; // Product index
-  real mu_c_b_mean;
-  real mu_b_b; // Kept constant during updates in this model
+  real mu_m_b_mean;
+  real mu_p_b; // Kept constant during updates in this model
   // real sigma_obs;
 }
 
 
 parameters {
-  real mu_c_b;
+  real mu_m_b;
   real sigma_obs;
 }
 
 model {
   sigma_obs ~ exponential(1);
-  mu_c_b ~ normal(mu_c_b_mean, 1);
-  // Model uses fixed mu_b_b and updates mu_a and mu_c_b
-  profit_obs ~ normal(pow(-1, product) * mu_b_b + pow(-1, market) * mu_c_b, sigma_obs); 
+  mu_m_b ~ normal(mu_m_b_mean, 1);
+  // Model uses fixed mu_p_b and updates mu_a and mu_m_b
+  profit_obs ~ normal(pow(-1, product) * mu_p_b/2 + pow(-1, market) * mu_m_b/2, sigma_obs); 
 }
diff --git a/simulation/stan/pivot_product.stan b/simulation/stan/pivot_product.stan
index 6b31868..a65e0aa 100644
--- a/simulation/stan/pivot_product.stan
+++ b/simulation/stan/pivot_product.stan
@@ -2,19 +2,19 @@ data {
   real profit_obs; // Observations from sample_profit_obs function
   int<lower=1> market; // Market index
   int<lower=1> product; // Product index
-  real mu_b_b_mean;
-  real mu_c_b; // Kept constant during updates in this model
+  real mu_p_b_mean;
+  real mu_m_b; // Kept constant during updates in this model
   // real sigma_obs;
 }
 
 parameters {
-  real mu_b_b;
+  real mu_p_b;
   real sigma_obs;
 }
 
 model {
   sigma_obs ~ exponential(1);
-  mu_b_b ~ normal(mu_b_b_mean, 1);
-  // Model uses fixed mu_c_b and updates mu_a and mu_b_b
-  profit_obs ~ normal(pow(-1, product) * mu_b_b + pow(-1, market) * mu_c_b, sigma_obs); 
+  mu_p_b ~ normal(mu_p_b_mean, 1);
+  // Model uses fixed mu_m_b and updates mu_a and mu_p_b
+  profit_obs ~ normal(pow(-1, product) * mu_p_b/2 + pow(-1, market) * mu_m_b/2, sigma_obs); 
 }
diff --git a/simulation/test_hypo.py b/simulation/test_hypo.py
index 10adf7d..11e88d1 100644
--- a/simulation/test_hypo.py
+++ b/simulation/test_hypo.py
@@ -24,7 +24,7 @@
 # TODO out: ratio of cells whose mu_p, mu_m exceed lowbar (b or r)??  
 
 # def brute_force(mu_p2m_range=np.linspace(-2, -2, 1), mu_sum_range=np.linspace(-4, 0, 2), sigma_profit_range = np.linspace(1, 2, 10), T=2,  product='man', market='b2c'):
-def brute_force(mu_p2m_range=np.linspace(.3, 3, 5), mu_sum_range=np.linspace(1, 4, 5), sigma_profit_range = np.linspace(.5, 2, 3), k_sigma_range =np.linspace(.5, 2, 3), T=2,  product='man', market='b2c'):    
+def brute_force(mu_p2m_range=np.linspace(.3, 3, 5), mu_sum_range=np.linspace(1, 4, 5), sigma_profit_range = np.linspace(.5, 1, 3), k_sigma_range =np.linspace(.5, 2, 3), T=3,  product='man', market='b2c'):    
     th_name = f"p2m{mu_p2m_range[0]}to{mu_p2m_range[-1]}l{len(mu_p2m_range)}_sum{mu_sum_range[0]}to{mu_sum_range[-1]}l{len(mu_sum_range)}_s{sigma_profit_range[0]}{sigma_profit_range[-1]}_k{k_sigma_range[0]}to{k_sigma_range[-1]}l{len(sigma_profit_range)}_T{T}_{product}_{market}"
     file_path = f"data/theory/{th_name}.nc"
     if os.path.exists(file_path):
@@ -80,7 +80,7 @@ def compute_pivot_ratio(em):
             pivot_market_count += 1
 
     # if pivot_market_count == 0:
-    #     return em.dims['ACT_PRED'] * 2 # Avoid division by zero
+    #     return em.dims['PRED'] * 2 # Avoid division by zero
     return (pivot_product_count+1) / (pivot_market_count + 1)
 
 def compute_reach_optimality(em):
@@ -100,7 +100,7 @@ def compute_time_to_reach_optimality(em):
         if product == 'ai' and market == 'b2b':
             return idx  # Return the first index where the condition is met
 
-    return em.dims['ACT_PRED'] * 2 # Return this value if the condition is never met
+    return em.dims['PRED'] * 2 # Return this value if the condition is never met
 
 
 def compute_sequence(em, sequence_length=3):