diff --git a/tests/backends/test_arviz.py b/tests/backends/test_arviz.py index 85c1d9915c..fe1906ec9c 100644 --- a/tests/backends/test_arviz.py +++ b/tests/backends/test_arviz.py @@ -19,15 +19,14 @@ import pytest import xarray -from arviz import InferenceData -from arviz.tests.helpers import check_multiple_attrs +from arviz_base.testing import check_multiple_attrs from numpy import ma from pytensor.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1 import pymc as pm from pymc.backends.arviz import ( - InferenceDataConverter, + DataTreeConverter, dataset_to_point_list, predictions_to_inference_data, to_inference_data, @@ -110,7 +109,7 @@ def get_inference_data(self, data, eight_schools_params): def get_predictions_inference_data( self, data, eight_schools_params, inplace - ) -> tuple[InferenceData, dict[str, np.ndarray]]: + ) -> tuple[xarray.DataTree, dict[str, np.ndarray]]: with data.model: prior = pm.sample_prior_predictive(return_inferencedata=False) posterior_predictive = pm.sample_posterior_predictive( @@ -123,17 +122,17 @@ def get_predictions_inference_data( coords={"school": np.arange(eight_schools_params["J"])}, dims={"theta": ["school"], "eta": ["school"]}, ) - assert isinstance(idata, InferenceData) + assert isinstance(idata, xarray.DataTree) extended = predictions_to_inference_data( posterior_predictive, idata_orig=idata, inplace=inplace ) - assert isinstance(extended, InferenceData) + assert isinstance(extended, xarray.DataTree) assert (id(idata) == id(extended)) == inplace return (extended, posterior_predictive) def make_predictions_inference_data( self, data, eight_schools_params - ) -> tuple[InferenceData, dict[str, np.ndarray]]: + ) -> tuple[xarray.DataTree, dict[str, np.ndarray]]: with data.model: posterior_predictive = pm.sample_posterior_predictive( data.obj, return_inferencedata=False @@ -144,7 +143,7 @@ def make_predictions_inference_data( coords={"school": np.arange(eight_schools_params["J"])}, dims={"theta": ["school"], "eta": ["school"]}, ) - assert isinstance(idata, InferenceData) + assert isinstance(idata, xarray.DataTree) return idata, posterior_predictive def test_to_idata(self, data, eight_schools_params, chains, draws): @@ -166,7 +165,7 @@ def test_to_idata(self, data, eight_schools_params, chains, draws): assert inference_data.log_likelihood["obs"].shape == (chains, draws, *obs.shape) def test_predictions_to_idata(self, data, eight_schools_params): - "Test that we can add predictions to a previously-existing InferenceData." + "Test that we can add predictions to a previously-existing xarray.DataTree." test_dict = { "posterior": ["mu", "tau", "eta", "theta"], "sample_stats": ["diverging", "lp"], @@ -236,7 +235,7 @@ def test_posterior_predictive_thinned(self, data): warnings.filterwarnings("ignore", ".*number of samples.*", UserWarning) idata = pm.sample(tune=5, draws=draws, chains=2, return_inferencedata=True) thinned_idata = idata.sel(draw=slice(None, None, thin_by)) - idata.extend(pm.sample_posterior_predictive(thinned_idata)) + idata.update(pm.sample_posterior_predictive(thinned_idata)) test_dict = { "posterior": ["mu", "tau", "eta", "theta"], "sample_stats": ["diverging", "lp", "~log_likelihood"], @@ -639,7 +638,12 @@ def test_constant_data_coords_issue_5046(self): assert len(data[k].shape) == len(dims[k]) ds = pm.backends.arviz.dict_to_dataset( - data=data, library=pm, coords=coords, dims=dims, default_dims=[], index_origin=0 + data=data, + inference_library=pm, + coords=coords, + dims=dims, + sample_dims=[], + index_origin=0, ) for dname, cvals in coords.items(): np.testing.assert_array_equal(ds[dname].values, cvals) @@ -661,14 +665,14 @@ def test_issue_5043_autoconvert_coord_values(self): ) # The converter must convert coord values them to numpy arrays # because tuples as coordinate values causes problems with xarray. - converter = InferenceDataConverter(trace=mtrace) + converter = DataTreeConverter(trace=mtrace) assert isinstance(converter.coords["city"], np.ndarray) converter.to_inference_data() # We're not automatically converting things other than tuple, - # so advanced use cases remain supported at the InferenceData level. + # so advanced use cases remain supported at the DataTree level. # They just can't be used in the model construction already. - converter = InferenceDataConverter( + converter = DataTreeConverter( trace=mtrace, coords={ "city": pd.MultiIndex.from_tuples( @@ -862,11 +866,13 @@ def test_incompatible_coordinate_lengths(): "Incompatible coordinate length of 3 for dimension 'a' of variable 'y'" ), ): - prior = pm.sample_prior_predictive(draws=1).prior.squeeze(("chain", "draw")) + prior = ( + pm.sample_prior_predictive(draws=1).prior.to_dataset().squeeze(("chain", "draw")) + ) assert prior.x.dims == prior.y.dims == ("a",) assert prior.x.shape == prior.y.shape == (3,) assert np.isnan(prior.y.values[-1]) - assert list(prior.coords["a"]) == [0, 1, 2] + assert list(prior.coords["a"]) == [-1, -2, -3] pm.backends.arviz.RAISE_ON_INCOMPATIBLE_COORD_LENGTHS = True with pytest.raises(ValueError): diff --git a/tests/backends/test_zarr.py b/tests/backends/test_zarr.py index af9c9e0a06..ce1cacfe9e 100644 --- a/tests/backends/test_zarr.py +++ b/tests/backends/test_zarr.py @@ -20,8 +20,6 @@ import xarray as xr import zarr -from arviz import InferenceData - import pymc as pm from pymc.backends.zarr import ZarrTrace @@ -436,7 +434,7 @@ def test_sample( assert isinstance(out_trace, ZarrTrace) assert out_trace.root.store is trace.root.store else: - assert isinstance(out_trace, InferenceData) + assert isinstance(out_trace, xr.DataTree) expected_groups = {"posterior", "constant_data", "observed_data", "sample_stats"} if include_transformed: diff --git a/tests/gp/test_hsgp_approx.py b/tests/gp/test_hsgp_approx.py index 84ad396b1c..d131f82e98 100644 --- a/tests/gp/test_hsgp_approx.py +++ b/tests/gp/test_hsgp_approx.py @@ -215,8 +215,8 @@ def test_prior(self, model, cov_func, X1, parametrization, rng): idata = pm.sample_prior_predictive(draws=1000, random_seed=rng) - samples1 = az.extract(idata.prior["f1"])["f1"].values.T - samples2 = az.extract(idata.prior["f2"])["f2"].values.T + samples1 = az.extract(idata.prior["f1"]).values.T + samples2 = az.extract(idata.prior["f2"]).values.T h0, mmd, critical_value, reject = two_sample_test( samples1, samples2, n_sims=500, alpha=0.01 @@ -242,8 +242,8 @@ def test_conditional(self, model, cov_func, X1, parametrization): idata = pm.sample_prior_predictive(draws=1000) - samples1 = az.extract(idata.prior["f"])["f"].values.T - samples2 = az.extract(idata.prior["fc"])["fc"].values.T + samples1 = az.extract(idata.prior["f"]).values.T + samples2 = az.extract(idata.prior["fc"]).values.T h0, mmd, critical_value, reject = two_sample_test( samples1, samples2, n_sims=500, alpha=0.01 @@ -302,8 +302,8 @@ def test_prior(self, model, cov_func, eta, X1, rng): idata = pm.sample_prior_predictive(draws=1000, random_seed=rng) - samples1 = az.extract(idata.prior["f1"])["f1"].values.T - samples2 = az.extract(idata.prior["f2"])["f2"].values.T + samples1 = az.extract(idata.prior["f1"]).values.T + samples2 = az.extract(idata.prior["f2"]).values.T h0, mmd, critical_value, reject = two_sample_test( samples1, samples2, n_sims=500, alpha=0.01 @@ -323,8 +323,8 @@ def test_conditional_periodic(self, model, cov_func, X1): idata = pm.sample_prior_predictive(draws=1000) - samples1 = az.extract(idata.prior["f"])["f"].values.T - samples2 = az.extract(idata.prior["fc"])["fc"].values.T + samples1 = az.extract(idata.prior["f"]).values.T + samples2 = az.extract(idata.prior["fc"]).values.T h0, mmd, critical_value, reject = two_sample_test( samples1, samples2, n_sims=500, alpha=0.01 diff --git a/tests/model/test_core.py b/tests/model/test_core.py index 5e5b7ecb7d..5208b552b2 100644 --- a/tests/model/test_core.py +++ b/tests/model/test_core.py @@ -227,7 +227,7 @@ def test_nested_model_to_netcdf(self, tmp_path): with pm.Model("scope") as model: b = pm.Normal("var") trace = pm.sample(100, tune=0) - az.to_netcdf(trace, tmp_path / "trace.nc") + trace.to_netcdf(tmp_path / "trace.nc") trace1 = az.from_netcdf(tmp_path / "trace.nc") assert "scope::var" in trace1.posterior @@ -1430,8 +1430,10 @@ def test_interval_missing_observations(self): np.testing.assert_array_equal(trace["theta2"][0][~obs2.mask], obs1[~obs2.mask]) pp_idata = pm.sample_posterior_predictive(trace, random_seed=rng) - pp_trace = pp_idata.posterior_predictive.stack(sample=["chain", "draw"]).transpose( - "sample", ... + pp_trace = ( + pp_idata.posterior_predictive.to_dataset() + .stack(sample=["chain", "draw"]) + .transpose("sample", ...) ) assert set(pp_trace.keys()) == { "theta1", diff --git a/tests/model/transform/test_conditioning.py b/tests/model/transform/test_conditioning.py index fa9ce71246..6369a68047 100644 --- a/tests/model/transform/test_conditioning.py +++ b/tests/model/transform/test_conditioning.py @@ -159,9 +159,11 @@ def test_do_posterior_predictive(): # Dummy posterior idata_m = az.from_dict( { - "x": np.full((2, 500), 25), - "y": np.full((2, 500), np.nan), - "z": np.full((2, 500), np.nan), + "posterior": { + "x": np.full((2, 500), 25), + "y": np.full((2, 500), np.nan), + "z": np.full((2, 500), np.nan), + } } ) @@ -293,7 +295,9 @@ def test_do_sample_posterior_predictive(make_interventions_shared): b = pm.Deterministic("b", a * 2) c = pm.Normal("c", b / 2) - idata = az.from_dict({"a": [[1.0]], "b": [[2.0]], "c": [[1.0]]}) + idata = az.from_dict( + {"posterior": {"a": np.array([[1.0]]), "b": np.array([[2.0]]), "c": np.array([[1.0]])}} + ) with do(model, {a: 1000}, make_interventions_shared=make_interventions_shared): pp = sample_posterior_predictive(idata, var_names=["c"], predictions=True).predictions diff --git a/tests/sampling/test_forward.py b/tests/sampling/test_forward.py index 784e16339b..15d9397307 100644 --- a/tests/sampling/test_forward.py +++ b/tests/sampling/test_forward.py @@ -22,9 +22,8 @@ import pytest import xarray as xr -from arviz import InferenceData -from arviz import from_dict as az_from_dict -from arviz.tests.helpers import check_multiple_attrs +from arviz_base import from_dict as az_from_dict +from arviz_base.testing import check_multiple_attrs from pytensor import Mode, shared from pytensor.compile import SharedVariable from pytensor.graph import graph_inputs @@ -441,7 +440,7 @@ def test_length_coords_volatile(self): # Same coord length -- `x` is not volatile trace_same_len = az_from_dict( - posterior={"x": [[[np.pi] * 3]]}, + {"posterior": {"x": np.array([[[np.pi] * 3]])}}, coords={"trial": range(3)}, dims={"x": ["trial"]}, ) @@ -449,19 +448,18 @@ def test_length_coords_volatile(self): pp_same_len = pm.sample_posterior_predictive( trace_same_len, var_names=["y"] ).posterior_predictive - assert pp_same_len["y"] == np.pi + assert pp_same_len["y"].values.item() == np.pi # Coord length changed -- `x` is volatile trace_diff_len = az_from_dict( - posterior={"x": [[[np.pi] * 2]]}, + {"posterior": {"x": np.array([[[np.pi] * 2]])}}, coords={"trial": range(2)}, - dims={"x": ["trial"]}, ) with model: pp_diff_len = pm.sample_posterior_predictive( trace_diff_len, var_names=["y"] ).posterior_predictive - assert pp_diff_len["y"] != np.pi + assert pp_diff_len["y"].values.item() != np.pi # Changing the dim length on the model itself # -- `x` is volatile because trace has same len as original model @@ -470,7 +468,7 @@ def test_length_coords_volatile(self): pp_diff_len_model_set = pm.sample_posterior_predictive( trace_same_len, var_names=["y"] ).posterior_predictive - assert pp_diff_len_model_set["y"] != np.pi + assert pp_diff_len_model_set["y"].values.item() != np.pi class TestSamplePPC: @@ -497,7 +495,7 @@ def test_normal_scalar(self): assert len(ppc) == 0 # test empty ppc with extend_inferencedata - assert isinstance(trace, InferenceData) + assert isinstance(trace, xr.DataTree) ppc = pm.sample_posterior_predictive(trace, var_names=[], extend_inferencedata=True) assert ppc is trace @@ -534,12 +532,12 @@ def test_normal_scalar_idata(self): discard_tuned_samples=False, ) - assert not isinstance(trace, InferenceData) + assert not isinstance(trace, xr.DataTree) with model: # test keep_size parameter and idata input idata = pm.to_inference_data(trace) - assert isinstance(idata, InferenceData) + assert isinstance(idata, xr.DataTree) ppc = pm.sample_posterior_predictive(idata, return_inferencedata=False) assert ppc["a"].shape == (nchains, ndraws) @@ -587,12 +585,12 @@ def test_normal_vector_idata(self): a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2])) trace = pm.sample(return_inferencedata=False) - assert not isinstance(trace, InferenceData) + assert not isinstance(trace, xr.DataTree) with model: # test keep_size parameter with inference data as input... idata = pm.to_inference_data(trace) - assert isinstance(idata, InferenceData) + assert isinstance(idata, xr.DataTree) ppc = pm.sample_posterior_predictive(idata, return_inferencedata=False) assert ppc["a"].shape == (trace.nchains, len(trace), 2) @@ -783,7 +781,7 @@ def test_potentials_warning(self): p = pm.Potential("p", a + 1) obs = pm.Normal("obs", a, 1, observed=5) - trace = az_from_dict({"a": np.random.rand(5)}) + trace = az_from_dict({"posterior": {"a": np.random.rand(1, 5)}}) with m: with pytest.warns(UserWarning, match=warning_msg): pm.sample_posterior_predictive(trace) @@ -886,7 +884,9 @@ def test_logging_sampled_basic_rvs_posterior(self, caplog): y = pm.Normal("y", x_det) z = pm.Normal("z", y, observed=0) - idata = az_from_dict(posterior={"x": np.zeros(5), "x_det": np.ones(5), "y": np.ones(5)}) + idata = az_from_dict( + {"posterior": {"x": np.zeros((1, 5)), "x_det": np.ones((1, 5)), "y": np.ones((1, 5))}} + ) with m: pm.sample_posterior_predictive(idata) assert caplog.record_tuples == [("pymc.sampling.forward", logging.INFO, "Sampling: [z]")] @@ -907,21 +907,21 @@ def test_logging_sampled_basic_rvs_posterior(self, caplog): # Missing deterministic `x_det` does not show in the log, even if it is being # recomputed, only `y` RV shows - idata = az_from_dict(posterior={"x": np.zeros(5)}) + idata = az_from_dict({"posterior": {"x": np.zeros((1, 5))}}) with m: pm.sample_posterior_predictive(idata) assert caplog.record_tuples == [("pymc.sampling.forward", logging.INFO, "Sampling: [y, z]")] caplog.clear() # Missing deterministic `x_det` does not cause recomputation of downstream `y` RV - idata = az_from_dict(posterior={"x": np.zeros(5), "y": np.ones(5)}) + idata = az_from_dict({"posterior": {"x": np.zeros((1, 5)), "y": np.ones((1, 5))}}) with m: pm.sample_posterior_predictive(idata) assert caplog.record_tuples == [("pymc.sampling.forward", logging.INFO, "Sampling: [z]")] caplog.clear() # Missing `x` causes sampling of downstream `y` RV, even if it is present in trace - idata = az_from_dict(posterior={"y": np.ones(5)}) + idata = az_from_dict({"posterior": {"y": np.ones((1, 5))}}) with m: pm.sample_posterior_predictive(idata) assert caplog.record_tuples == [ @@ -938,7 +938,9 @@ def test_logging_sampled_basic_rvs_posterior_deterministic(self, caplog): # Explicit resampling a deterministic will lead to resampling of downstream RV `y` # This behavior could change in the future as the posterior of `y` is still valid - idata = az_from_dict(posterior={"x": np.zeros(5), "x_det": np.ones(5), "y": np.ones(5)}) + idata = az_from_dict( + {"posterior": {"x": np.zeros((1, 5)), "x_det": np.ones((1, 5)), "y": np.ones((1, 5))}} + ) with m: pm.sample_posterior_predictive(idata, var_names=["x_det", "z"]) assert caplog.record_tuples == [("pymc.sampling.forward", logging.INFO, "Sampling: [y, z]")] @@ -979,7 +981,7 @@ def mock_multitrace(self): ) return trace - @pytest.fixture(scope="class", params=["MultiTrace", "InferenceData", "Dataset"]) + @pytest.fixture(scope="class", params=["MultiTrace", "DataTree", "Dataset"]) def mock_sample_results(self, request, mock_multitrace): kind = request.param trace = mock_multitrace @@ -1012,8 +1014,8 @@ def test_logging_sampled_basic_rvs_posterior_mutable(self, mock_sample_results, ("pymc.sampling.forward", logging.INFO, "Sampling: [a, b, sigma, y]") ] caplog.clear() - elif kind == "InferenceData": - # InferenceData has all MCMC posterior samples and the values for both coordinates and + elif kind == "DataTree": + # DataTree has all MCMC posterior samples and the values for both coordinates and # data containers. This enables it to see that no data has changed and it should only # resample the observed variable assert caplog.record_tuples == [ @@ -1031,7 +1033,7 @@ def test_logging_sampled_basic_rvs_posterior_mutable(self, mock_sample_results, original_offsets = model["offsets"].get_value() with model: - # Changing the Data values. This will only be picked up by InferenceData + # Changing the Data values. This will only be picked up by DataTree pm.set_data({"offsets": original_offsets + 1}) pm.sample_posterior_predictive(samples) if kind == "MultiTrace": @@ -1039,7 +1041,7 @@ def test_logging_sampled_basic_rvs_posterior_mutable(self, mock_sample_results, ("pymc.sampling.forward", logging.INFO, "Sampling: [a, b, sigma, y]") ] caplog.clear() - elif kind == "InferenceData": + elif kind == "DataTree": assert caplog.record_tuples == [ ("pymc.sampling.forward", logging.INFO, "Sampling: [b, y]") ] @@ -1051,7 +1053,7 @@ def test_logging_sampled_basic_rvs_posterior_mutable(self, mock_sample_results, caplog.clear() with model: - # Changing the mutable coordinates. This will be picked up by InferenceData and Dataset + # Changing the mutable coordinates. This will be picked up by DataTree and Dataset model.set_dim("name", new_length=4, coord_values=["D", "E", "F", "G"]) pm.set_data({"offsets": original_offsets, "y_obs": np.zeros((10, 4))}) pm.sample_posterior_predictive(samples) @@ -1060,7 +1062,7 @@ def test_logging_sampled_basic_rvs_posterior_mutable(self, mock_sample_results, ("pymc.sampling.forward", logging.INFO, "Sampling: [a, b, sigma, y]") ] caplog.clear() - elif kind == "InferenceData": + elif kind == "DataTree": assert caplog.record_tuples == [ ("pymc.sampling.forward", logging.INFO, "Sampling: [a, sigma, y]") ] @@ -1082,7 +1084,7 @@ def test_logging_sampled_basic_rvs_posterior_mutable(self, mock_sample_results, ("pymc.sampling.forward", logging.INFO, "Sampling: [a, b, sigma, y]") ] caplog.clear() - elif kind == "InferenceData": + elif kind == "DataTree": assert caplog.record_tuples == [ ("pymc.sampling.forward", logging.INFO, "Sampling: [a, b, sigma, y]") ] @@ -1106,7 +1108,7 @@ def test_observed_data_needed_in_pp(self): prior = pm.sample_prior_predictive(draws=25).prior - fake_idata = InferenceData(posterior=prior) + fake_idata = az_from_dict({"posterior": prior}) new_coords = {"trial": range(2), "feature": range(3)} new_x_data = np.random.normal(size=(2, 3)) @@ -1130,7 +1132,7 @@ def test_observed_data_needed_in_pp(self): prior = pm.sample_prior_predictive(draws=25).prior - fake_idata = InferenceData(posterior=prior) + fake_idata = az_from_dict({"posterior": prior}) with m: pm.set_data({"x_data": new_x_data}, coords=new_coords) @@ -1407,7 +1409,7 @@ def test_pytensor_function_kwargs(self): y = pm.Deterministic("y", x + sharedvar) pp = pm.sample_posterior_predictive( - trace=az_from_dict({"x": np.arange(5)}), + trace=az_from_dict({"posterior": {"x": np.arange(5).reshape(1, 5)}}), var_names=["y"], return_inferencedata=False, compile_kwargs={ @@ -1421,7 +1423,9 @@ def test_pytensor_function_kwargs(self): def test_sample_dims(self, point_list_arg_bug_fixture): pmodel, trace = point_list_arg_bug_fixture with pmodel: - post = pm.to_inference_data(trace).posterior.stack(sample=["chain", "draw"]) + post = ( + pm.to_inference_data(trace).posterior.to_dataset().stack(sample=["chain", "draw"]) + ) pp = pm.sample_posterior_predictive(post, var_names=["d"], sample_dims=["sample"]) assert "sample" in pp.posterior_predictive assert len(pp.posterior_predictive["sample"]) == len(post["sample"]) @@ -1846,7 +1850,7 @@ def model_to_vectorize(has_nested_random_variables): with model: idata = pm.sample_prior_predictive(100) - idata.add_groups({"posterior": idata.prior}) + idata.update({"posterior": idata.prior}) return freeze_dims_and_data(model), idata @@ -1939,7 +1943,7 @@ def test_vectorize_over_posterior_matches_sample(): ) } ) - idata = InferenceData(posterior=posterior) + idata = az_from_dict({"posterior": posterior}) with model: pp = pm.sample_posterior_predictive(idata, var_names=["obs", "det"], random_seed=1234) vectorized = vectorize_over_posterior( @@ -1967,7 +1971,7 @@ def test_vectorize_over_posterior_with_intermediate_rvs(): c = b + 1 d = pm.Normal.dist(c) idata = pm.sample_prior_predictive(100, var_names=["a"]) - idata.add_groups({"posterior": idata.prior}) + idata.update({"posterior": idata.prior}) _, _, vectorized_no_intermediate = vectorize_over_posterior( outputs=[b, c, d], posterior=idata.posterior, diff --git a/tests/sampling/test_mcmc.py b/tests/sampling/test_mcmc.py index 090b76130b..5d2374092f 100644 --- a/tests/sampling/test_mcmc.py +++ b/tests/sampling/test_mcmc.py @@ -24,9 +24,9 @@ import pytest import scipy.special -from arviz import InferenceData from pytensor import shared from pytensor.compile.ops import as_op +from xarray import DataTree import pymc as pm @@ -377,7 +377,7 @@ def test_sample_return_lengths(self): assert mtrace_pst.report.n_tune == 50 assert mtrace_pst.report.n_draws == 100 - # InferenceData with warmup + # DataTree with warmup idata_w = pm.sampling.mcmc._sample_return( run=None, traces=traces, @@ -390,13 +390,13 @@ def test_sample_return_lengths(self): idata_kwargs={}, model=model, ) - assert isinstance(idata_w, InferenceData) + assert isinstance(idata_w, DataTree) assert hasattr(idata_w, "warmup_posterior") assert idata_w.warmup_posterior.sizes["draw"] == 50 assert idata_w.posterior.sizes["draw"] == 100 assert idata_w.posterior.sizes["chain"] == 3 - # InferenceData without warmup + # DataTree without warmup idata = pm.sampling.mcmc._sample_return( run=None, traces=traces, @@ -409,7 +409,7 @@ def test_sample_return_lengths(self): idata_kwargs={}, model=model, ) - assert isinstance(idata, InferenceData) + assert isinstance(idata, DataTree) assert not hasattr(idata, "warmup_posterior") assert idata.posterior.sizes["draw"] == 100 assert idata.posterior.sizes["chain"] == 3 @@ -458,7 +458,7 @@ def test_keep_warning_stat_setting(self, keep_warning_stat): if keep_warning_stat: assert "warning" in idata.warmup_sample_stats assert "warning" in idata.sample_stats - # And end up in the InferenceData + # And end up in the DataTree assert "warning" in idata.sample_stats # NOTE: The stats are squeezed by default but this does not always work. # This tests flattens so we don't have to be exact in accessing (non-)squeezed items. diff --git a/tests/smc/test_smc.py b/tests/smc/test_smc.py index 493c0c8daa..8816e3a0bf 100644 --- a/tests/smc/test_smc.py +++ b/tests/smc/test_smc.py @@ -20,7 +20,7 @@ import pytest import scipy.stats as st -from arviz.data.inference_data import InferenceData +from xarray import DataTree import pymc as pm @@ -236,7 +236,7 @@ def test_return_datatype(self, chains): progressbar=not (chains > 1 and _IS_WINDOWS), ) - assert isinstance(idata, InferenceData) + assert isinstance(idata, DataTree) assert "sample_stats" in idata assert idata.posterior.sizes["chain"] == chains assert idata.posterior.sizes["draw"] == draws @@ -288,7 +288,7 @@ def test_normal_model(self): idata = pm.sample_smc(draws=2000, kernel=pm.smc.MH, progressbar=not _IS_WINDOWS) assert_random_state_equal(initial_rng_state, np.random.get_state()) - post = idata.posterior.stack(sample=("chain", "draw")) + post = idata.posterior.to_dataset().stack(sample=("chain", "draw")) assert np.abs(post["mu"].mean() - 10) < 0.1 assert np.abs(post["sigma"].mean() - 0.5) < 0.05 diff --git a/tests/stats/test_convergence.py b/tests/stats/test_convergence.py index 52d5c5048c..3a25f9ff86 100644 --- a/tests/stats/test_convergence.py +++ b/tests/stats/test_convergence.py @@ -30,8 +30,10 @@ ) def test_warn_divergences(diverging, expected_phrase): idata = arviz.from_dict( - sample_stats={ - "diverging": np.array([diverging, [0, 0, 0, 0]]).astype(bool), + { + "sample_stats": { + "diverging": np.array([diverging, [0, 0, 0, 0]]).astype(bool), + } } ) warns = convergence.warn_divergences(idata) @@ -41,8 +43,10 @@ def test_warn_divergences(diverging, expected_phrase): def test_warn_treedepth(): idata = arviz.from_dict( - sample_stats={ - "reached_max_treedepth": np.array([[0, 0, 0], [0, 1, 0]]).astype(bool), + { + "sample_stats": { + "reached_max_treedepth": np.array([[0, 0, 0], [0, 1, 0]]).astype(bool), + } } ) warns = convergence.warn_treedepth(idata) @@ -56,8 +60,10 @@ def test_warn_treedepth_multiple_samplers(): max_treedepth[0, 0, 0] = True max_treedepth[2, 1, 1] = True idata = arviz.from_dict( - sample_stats={ - "reached_max_treedepth": max_treedepth, + { + "sample_stats": { + "reached_max_treedepth": max_treedepth, + } } ) warns = convergence.warn_treedepth(idata) diff --git a/tests/stats/test_log_density.py b/tests/stats/test_log_density.py index 7b2eb3774e..4bfee21e3b 100644 --- a/tests/stats/test_log_density.py +++ b/tests/stats/test_log_density.py @@ -17,7 +17,7 @@ import pytest import scipy.stats as st -from arviz import InferenceData, dict_to_dataset, from_dict +from arviz import from_dict from pymc.distributions import Dirichlet, Normal from pymc.distributions.transforms import log @@ -35,7 +35,7 @@ def test_basic(self, transform): x_value_var = m.rvs_to_values[x] y = Normal("y", x, observed=[0, 1, 2], dims=("test_dim",)) - idata = InferenceData(posterior=dict_to_dataset({"x": np.arange(100).reshape(4, 25)})) + idata = from_dict({"posterior": {"x": np.arange(100).reshape(4, 25)}}) res = compute_log_likelihood(idata) # Check we didn't erase the original mappings @@ -61,7 +61,7 @@ def test_multivariate(self): "y", a=p.exp(), observed=y_draws, dims=("test_event_dim", "test_support_dim") ) - idata = InferenceData(posterior=dict_to_dataset({"p": p_draws})) + idata = from_dict({"posterior": {"p": p_draws}}) res = compute_log_likelihood(idata) assert res.log_likelihood.sizes == {"chain": 4, "draw": 25, "test_event_dim": 10} @@ -77,7 +77,7 @@ def test_var_names(self): y1 = Normal("y1", x, observed=[0, 1, 2]) y2 = Normal("y2", x, observed=[3, 4]) - idata = InferenceData(posterior=dict_to_dataset({"x": np.arange(100).reshape(4, 25)})) + idata = from_dict({"posterior": {"x": np.arange(100).reshape(4, 25)}}) res_y1 = compute_log_likelihood( idata, var_names=["y1"], extend_inferencedata=False, model=m, progressbar=False @@ -116,7 +116,7 @@ def test_invalid_var_names(self): x = Normal("x") y = Normal("y", x, observed=[0, 1, 2]) - idata = InferenceData(posterior=dict_to_dataset({"x": np.arange(100).reshape(4, 25)})) + idata = from_dict({"posterior": {"x": np.arange(100).reshape(4, 25)}}) with pytest.raises(ValueError, match="var_names must refer to observed_RVs"): compute_log_likelihood(idata, var_names=["x"]) @@ -126,7 +126,7 @@ def test_dims_without_coords(self): x = Normal("x") y = Normal("y", x, observed=[0, 0, 0], shape=(3,), dims="obs") - trace = from_dict({"x": [[0, 1]]}) + trace = from_dict({"posterior": {"x": np.array([[0, 1]])}}) llike = compute_log_likelihood(trace) assert len(llike.log_likelihood["obs"]) == 3 @@ -143,7 +143,7 @@ def test_basic_log_prior(self, transform): x_value_var = m.rvs_to_values[x] Normal("y", x, observed=[0, 1, 2]) - idata = InferenceData(posterior=dict_to_dataset({"x": np.arange(100).reshape(4, 25)})) + idata = from_dict({"posterior": {"x": np.arange(100).reshape(4, 25)}}) res = compute_log_prior(idata) # Check we didn't erase the original mappings @@ -164,7 +164,7 @@ def test_deterministic_log_prior(self): Deterministic("d", 2 * x) Normal("y", x, observed=[0, 1, 2]) - idata = InferenceData(posterior=dict_to_dataset({"x": np.arange(100).reshape(4, 25)})) + idata = from_dict({"posterior": {"x": np.arange(100).reshape(4, 25)}}) res = compute_log_prior(idata) assert res is idata @@ -183,7 +183,7 @@ def test_compilation_kwargs(self): Deterministic("d", 2 * x) Normal("y", x, observed=[0, 1, 2]) - idata = InferenceData(posterior=dict_to_dataset({"x": np.arange(100).reshape(4, 25)})) + idata = from_dict({"posterior": {"x": np.arange(100).reshape(4, 25)}}) with ( # apply_function_over_dataset fails with patched `compile_pymc` patch("pymc.stats.log_density.apply_function_over_dataset"),