diff --git a/tests/backends/test_arviz.py b/tests/backends/test_arviz.py
index 85c1d9915c..fe1906ec9c 100644
--- a/tests/backends/test_arviz.py
+++ b/tests/backends/test_arviz.py
@@ -19,15 +19,14 @@
 import pytest
 import xarray
 
-from arviz import InferenceData
-from arviz.tests.helpers import check_multiple_attrs
+from arviz_base.testing import check_multiple_attrs
 from numpy import ma
 from pytensor.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1
 
 import pymc as pm
 
 from pymc.backends.arviz import (
-    InferenceDataConverter,
+    DataTreeConverter,
     dataset_to_point_list,
     predictions_to_inference_data,
     to_inference_data,
@@ -110,7 +109,7 @@ def get_inference_data(self, data, eight_schools_params):
 
     def get_predictions_inference_data(
         self, data, eight_schools_params, inplace
-    ) -> tuple[InferenceData, dict[str, np.ndarray]]:
+    ) -> tuple[xarray.DataTree, dict[str, np.ndarray]]:
         with data.model:
             prior = pm.sample_prior_predictive(return_inferencedata=False)
             posterior_predictive = pm.sample_posterior_predictive(
@@ -123,17 +122,17 @@ def get_predictions_inference_data(
                 coords={"school": np.arange(eight_schools_params["J"])},
                 dims={"theta": ["school"], "eta": ["school"]},
             )
-            assert isinstance(idata, InferenceData)
+            assert isinstance(idata, xarray.DataTree)
             extended = predictions_to_inference_data(
                 posterior_predictive, idata_orig=idata, inplace=inplace
             )
-            assert isinstance(extended, InferenceData)
+            assert isinstance(extended, xarray.DataTree)
             assert (id(idata) == id(extended)) == inplace
         return (extended, posterior_predictive)
 
     def make_predictions_inference_data(
         self, data, eight_schools_params
-    ) -> tuple[InferenceData, dict[str, np.ndarray]]:
+    ) -> tuple[xarray.DataTree, dict[str, np.ndarray]]:
         with data.model:
             posterior_predictive = pm.sample_posterior_predictive(
                 data.obj, return_inferencedata=False
@@ -144,7 +143,7 @@ def make_predictions_inference_data(
                 coords={"school": np.arange(eight_schools_params["J"])},
                 dims={"theta": ["school"], "eta": ["school"]},
             )
-            assert isinstance(idata, InferenceData)
+            assert isinstance(idata, xarray.DataTree)
         return idata, posterior_predictive
 
     def test_to_idata(self, data, eight_schools_params, chains, draws):
@@ -166,7 +165,7 @@ def test_to_idata(self, data, eight_schools_params, chains, draws):
         assert inference_data.log_likelihood["obs"].shape == (chains, draws, *obs.shape)
 
     def test_predictions_to_idata(self, data, eight_schools_params):
-        "Test that we can add predictions to a previously-existing InferenceData."
+        "Test that we can add predictions to a previously-existing xarray.DataTree."
         test_dict = {
             "posterior": ["mu", "tau", "eta", "theta"],
             "sample_stats": ["diverging", "lp"],
@@ -236,7 +235,7 @@ def test_posterior_predictive_thinned(self, data):
                 warnings.filterwarnings("ignore", ".*number of samples.*", UserWarning)
                 idata = pm.sample(tune=5, draws=draws, chains=2, return_inferencedata=True)
             thinned_idata = idata.sel(draw=slice(None, None, thin_by))
-            idata.extend(pm.sample_posterior_predictive(thinned_idata))
+            idata.update(pm.sample_posterior_predictive(thinned_idata))
         test_dict = {
             "posterior": ["mu", "tau", "eta", "theta"],
             "sample_stats": ["diverging", "lp", "~log_likelihood"],
@@ -639,7 +638,12 @@ def test_constant_data_coords_issue_5046(self):
             assert len(data[k].shape) == len(dims[k])
 
         ds = pm.backends.arviz.dict_to_dataset(
-            data=data, library=pm, coords=coords, dims=dims, default_dims=[], index_origin=0
+            data=data,
+            inference_library=pm,
+            coords=coords,
+            dims=dims,
+            sample_dims=[],
+            index_origin=0,
         )
         for dname, cvals in coords.items():
             np.testing.assert_array_equal(ds[dname].values, cvals)
@@ -661,14 +665,14 @@ def test_issue_5043_autoconvert_coord_values(self):
             )
             # The converter must convert coord values them to numpy arrays
             # because tuples as coordinate values causes problems with xarray.
-            converter = InferenceDataConverter(trace=mtrace)
+            converter = DataTreeConverter(trace=mtrace)
             assert isinstance(converter.coords["city"], np.ndarray)
             converter.to_inference_data()
 
             # We're not automatically converting things other than tuple,
-            # so advanced use cases remain supported at the InferenceData level.
+            # so advanced use cases remain supported at the DataTree level.
             # They just can't be used in the model construction already.
-            converter = InferenceDataConverter(
+            converter = DataTreeConverter(
                 trace=mtrace,
                 coords={
                     "city": pd.MultiIndex.from_tuples(
@@ -862,11 +866,13 @@ def test_incompatible_coordinate_lengths():
                 "Incompatible coordinate length of 3 for dimension 'a' of variable 'y'"
             ),
         ):
-            prior = pm.sample_prior_predictive(draws=1).prior.squeeze(("chain", "draw"))
+            prior = (
+                pm.sample_prior_predictive(draws=1).prior.to_dataset().squeeze(("chain", "draw"))
+            )
         assert prior.x.dims == prior.y.dims == ("a",)
         assert prior.x.shape == prior.y.shape == (3,)
         assert np.isnan(prior.y.values[-1])
-        assert list(prior.coords["a"]) == [0, 1, 2]
+        assert list(prior.coords["a"]) == [-1, -2, -3]
 
         pm.backends.arviz.RAISE_ON_INCOMPATIBLE_COORD_LENGTHS = True
         with pytest.raises(ValueError):
diff --git a/tests/backends/test_zarr.py b/tests/backends/test_zarr.py
index af9c9e0a06..ce1cacfe9e 100644
--- a/tests/backends/test_zarr.py
+++ b/tests/backends/test_zarr.py
@@ -20,8 +20,6 @@
 import xarray as xr
 import zarr
 
-from arviz import InferenceData
-
 import pymc as pm
 
 from pymc.backends.zarr import ZarrTrace
@@ -436,7 +434,7 @@ def test_sample(
         assert isinstance(out_trace, ZarrTrace)
         assert out_trace.root.store is trace.root.store
     else:
-        assert isinstance(out_trace, InferenceData)
+        assert isinstance(out_trace, xr.DataTree)
 
     expected_groups = {"posterior", "constant_data", "observed_data", "sample_stats"}
     if include_transformed:
diff --git a/tests/gp/test_hsgp_approx.py b/tests/gp/test_hsgp_approx.py
index 84ad396b1c..d131f82e98 100644
--- a/tests/gp/test_hsgp_approx.py
+++ b/tests/gp/test_hsgp_approx.py
@@ -215,8 +215,8 @@ def test_prior(self, model, cov_func, X1, parametrization, rng):
 
             idata = pm.sample_prior_predictive(draws=1000, random_seed=rng)
 
-        samples1 = az.extract(idata.prior["f1"])["f1"].values.T
-        samples2 = az.extract(idata.prior["f2"])["f2"].values.T
+        samples1 = az.extract(idata.prior["f1"]).values.T
+        samples2 = az.extract(idata.prior["f2"]).values.T
 
         h0, mmd, critical_value, reject = two_sample_test(
             samples1, samples2, n_sims=500, alpha=0.01
@@ -242,8 +242,8 @@ def test_conditional(self, model, cov_func, X1, parametrization):
 
             idata = pm.sample_prior_predictive(draws=1000)
 
-        samples1 = az.extract(idata.prior["f"])["f"].values.T
-        samples2 = az.extract(idata.prior["fc"])["fc"].values.T
+        samples1 = az.extract(idata.prior["f"]).values.T
+        samples2 = az.extract(idata.prior["fc"]).values.T
 
         h0, mmd, critical_value, reject = two_sample_test(
             samples1, samples2, n_sims=500, alpha=0.01
@@ -302,8 +302,8 @@ def test_prior(self, model, cov_func, eta, X1, rng):
 
             idata = pm.sample_prior_predictive(draws=1000, random_seed=rng)
 
-        samples1 = az.extract(idata.prior["f1"])["f1"].values.T
-        samples2 = az.extract(idata.prior["f2"])["f2"].values.T
+        samples1 = az.extract(idata.prior["f1"]).values.T
+        samples2 = az.extract(idata.prior["f2"]).values.T
 
         h0, mmd, critical_value, reject = two_sample_test(
             samples1, samples2, n_sims=500, alpha=0.01
@@ -323,8 +323,8 @@ def test_conditional_periodic(self, model, cov_func, X1):
 
             idata = pm.sample_prior_predictive(draws=1000)
 
-        samples1 = az.extract(idata.prior["f"])["f"].values.T
-        samples2 = az.extract(idata.prior["fc"])["fc"].values.T
+        samples1 = az.extract(idata.prior["f"]).values.T
+        samples2 = az.extract(idata.prior["fc"]).values.T
 
         h0, mmd, critical_value, reject = two_sample_test(
             samples1, samples2, n_sims=500, alpha=0.01
diff --git a/tests/model/test_core.py b/tests/model/test_core.py
index 5e5b7ecb7d..5208b552b2 100644
--- a/tests/model/test_core.py
+++ b/tests/model/test_core.py
@@ -227,7 +227,7 @@ def test_nested_model_to_netcdf(self, tmp_path):
         with pm.Model("scope") as model:
             b = pm.Normal("var")
             trace = pm.sample(100, tune=0)
-        az.to_netcdf(trace, tmp_path / "trace.nc")
+        trace.to_netcdf(tmp_path / "trace.nc")
         trace1 = az.from_netcdf(tmp_path / "trace.nc")
         assert "scope::var" in trace1.posterior
 
@@ -1430,8 +1430,10 @@ def test_interval_missing_observations(self):
             np.testing.assert_array_equal(trace["theta2"][0][~obs2.mask], obs1[~obs2.mask])
 
             pp_idata = pm.sample_posterior_predictive(trace, random_seed=rng)
-            pp_trace = pp_idata.posterior_predictive.stack(sample=["chain", "draw"]).transpose(
-                "sample", ...
+            pp_trace = (
+                pp_idata.posterior_predictive.to_dataset()
+                .stack(sample=["chain", "draw"])
+                .transpose("sample", ...)
             )
             assert set(pp_trace.keys()) == {
                 "theta1",
diff --git a/tests/model/transform/test_conditioning.py b/tests/model/transform/test_conditioning.py
index fa9ce71246..6369a68047 100644
--- a/tests/model/transform/test_conditioning.py
+++ b/tests/model/transform/test_conditioning.py
@@ -159,9 +159,11 @@ def test_do_posterior_predictive():
     # Dummy posterior
     idata_m = az.from_dict(
         {
-            "x": np.full((2, 500), 25),
-            "y": np.full((2, 500), np.nan),
-            "z": np.full((2, 500), np.nan),
+            "posterior": {
+                "x": np.full((2, 500), 25),
+                "y": np.full((2, 500), np.nan),
+                "z": np.full((2, 500), np.nan),
+            }
         }
     )
 
@@ -293,7 +295,9 @@ def test_do_sample_posterior_predictive(make_interventions_shared):
         b = pm.Deterministic("b", a * 2)
         c = pm.Normal("c", b / 2)
 
-    idata = az.from_dict({"a": [[1.0]], "b": [[2.0]], "c": [[1.0]]})
+    idata = az.from_dict(
+        {"posterior": {"a": np.array([[1.0]]), "b": np.array([[2.0]]), "c": np.array([[1.0]])}}
+    )
 
     with do(model, {a: 1000}, make_interventions_shared=make_interventions_shared):
         pp = sample_posterior_predictive(idata, var_names=["c"], predictions=True).predictions
diff --git a/tests/sampling/test_forward.py b/tests/sampling/test_forward.py
index 784e16339b..15d9397307 100644
--- a/tests/sampling/test_forward.py
+++ b/tests/sampling/test_forward.py
@@ -22,9 +22,8 @@
 import pytest
 import xarray as xr
 
-from arviz import InferenceData
-from arviz import from_dict as az_from_dict
-from arviz.tests.helpers import check_multiple_attrs
+from arviz_base import from_dict as az_from_dict
+from arviz_base.testing import check_multiple_attrs
 from pytensor import Mode, shared
 from pytensor.compile import SharedVariable
 from pytensor.graph import graph_inputs
@@ -441,7 +440,7 @@ def test_length_coords_volatile(self):
 
         # Same coord length -- `x` is not volatile
         trace_same_len = az_from_dict(
-            posterior={"x": [[[np.pi] * 3]]},
+            {"posterior": {"x": np.array([[[np.pi] * 3]])}},
             coords={"trial": range(3)},
             dims={"x": ["trial"]},
         )
@@ -449,19 +448,18 @@ def test_length_coords_volatile(self):
             pp_same_len = pm.sample_posterior_predictive(
                 trace_same_len, var_names=["y"]
             ).posterior_predictive
-        assert pp_same_len["y"] == np.pi
+        assert pp_same_len["y"].values.item() == np.pi
 
         # Coord length changed -- `x` is volatile
         trace_diff_len = az_from_dict(
-            posterior={"x": [[[np.pi] * 2]]},
+            {"posterior": {"x": np.array([[[np.pi] * 2]])}},
             coords={"trial": range(2)},
-            dims={"x": ["trial"]},
         )
         with model:
             pp_diff_len = pm.sample_posterior_predictive(
                 trace_diff_len, var_names=["y"]
             ).posterior_predictive
-        assert pp_diff_len["y"] != np.pi
+        assert pp_diff_len["y"].values.item() != np.pi
 
         # Changing the dim length on the model itself
         # -- `x` is volatile because trace has same len as original model
@@ -470,7 +468,7 @@ def test_length_coords_volatile(self):
             pp_diff_len_model_set = pm.sample_posterior_predictive(
                 trace_same_len, var_names=["y"]
             ).posterior_predictive
-        assert pp_diff_len_model_set["y"] != np.pi
+        assert pp_diff_len_model_set["y"].values.item() != np.pi
 
 
 class TestSamplePPC:
@@ -497,7 +495,7 @@ def test_normal_scalar(self):
             assert len(ppc) == 0
 
             # test empty ppc with extend_inferencedata
-            assert isinstance(trace, InferenceData)
+            assert isinstance(trace, xr.DataTree)
             ppc = pm.sample_posterior_predictive(trace, var_names=[], extend_inferencedata=True)
             assert ppc is trace
 
@@ -534,12 +532,12 @@ def test_normal_scalar_idata(self):
                     discard_tuned_samples=False,
                 )
 
-        assert not isinstance(trace, InferenceData)
+        assert not isinstance(trace, xr.DataTree)
 
         with model:
             # test keep_size parameter and idata input
             idata = pm.to_inference_data(trace)
-            assert isinstance(idata, InferenceData)
+            assert isinstance(idata, xr.DataTree)
 
             ppc = pm.sample_posterior_predictive(idata, return_inferencedata=False)
             assert ppc["a"].shape == (nchains, ndraws)
@@ -587,12 +585,12 @@ def test_normal_vector_idata(self):
             a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2]))
             trace = pm.sample(return_inferencedata=False)
 
-        assert not isinstance(trace, InferenceData)
+        assert not isinstance(trace, xr.DataTree)
 
         with model:
             # test keep_size parameter with inference data as input...
             idata = pm.to_inference_data(trace)
-            assert isinstance(idata, InferenceData)
+            assert isinstance(idata, xr.DataTree)
 
             ppc = pm.sample_posterior_predictive(idata, return_inferencedata=False)
             assert ppc["a"].shape == (trace.nchains, len(trace), 2)
@@ -783,7 +781,7 @@ def test_potentials_warning(self):
             p = pm.Potential("p", a + 1)
             obs = pm.Normal("obs", a, 1, observed=5)
 
-        trace = az_from_dict({"a": np.random.rand(5)})
+        trace = az_from_dict({"posterior": {"a": np.random.rand(1, 5)}})
         with m:
             with pytest.warns(UserWarning, match=warning_msg):
                 pm.sample_posterior_predictive(trace)
@@ -886,7 +884,9 @@ def test_logging_sampled_basic_rvs_posterior(self, caplog):
             y = pm.Normal("y", x_det)
             z = pm.Normal("z", y, observed=0)
 
-        idata = az_from_dict(posterior={"x": np.zeros(5), "x_det": np.ones(5), "y": np.ones(5)})
+        idata = az_from_dict(
+            {"posterior": {"x": np.zeros((1, 5)), "x_det": np.ones((1, 5)), "y": np.ones((1, 5))}}
+        )
         with m:
             pm.sample_posterior_predictive(idata)
         assert caplog.record_tuples == [("pymc.sampling.forward", logging.INFO, "Sampling: [z]")]
@@ -907,21 +907,21 @@ def test_logging_sampled_basic_rvs_posterior(self, caplog):
 
         # Missing deterministic `x_det` does not show in the log, even if it is being
         # recomputed, only `y` RV shows
-        idata = az_from_dict(posterior={"x": np.zeros(5)})
+        idata = az_from_dict({"posterior": {"x": np.zeros((1, 5))}})
         with m:
             pm.sample_posterior_predictive(idata)
         assert caplog.record_tuples == [("pymc.sampling.forward", logging.INFO, "Sampling: [y, z]")]
         caplog.clear()
 
         # Missing deterministic `x_det` does not cause recomputation of downstream `y` RV
-        idata = az_from_dict(posterior={"x": np.zeros(5), "y": np.ones(5)})
+        idata = az_from_dict({"posterior": {"x": np.zeros((1, 5)), "y": np.ones((1, 5))}})
         with m:
             pm.sample_posterior_predictive(idata)
         assert caplog.record_tuples == [("pymc.sampling.forward", logging.INFO, "Sampling: [z]")]
         caplog.clear()
 
         # Missing `x` causes sampling of downstream `y` RV, even if it is present in trace
-        idata = az_from_dict(posterior={"y": np.ones(5)})
+        idata = az_from_dict({"posterior": {"y": np.ones((1, 5))}})
         with m:
             pm.sample_posterior_predictive(idata)
         assert caplog.record_tuples == [
@@ -938,7 +938,9 @@ def test_logging_sampled_basic_rvs_posterior_deterministic(self, caplog):
 
         # Explicit resampling a deterministic will lead to resampling of downstream RV `y`
         # This behavior could change in the future as the posterior of `y` is still valid
-        idata = az_from_dict(posterior={"x": np.zeros(5), "x_det": np.ones(5), "y": np.ones(5)})
+        idata = az_from_dict(
+            {"posterior": {"x": np.zeros((1, 5)), "x_det": np.ones((1, 5)), "y": np.ones((1, 5))}}
+        )
         with m:
             pm.sample_posterior_predictive(idata, var_names=["x_det", "z"])
         assert caplog.record_tuples == [("pymc.sampling.forward", logging.INFO, "Sampling: [y, z]")]
@@ -979,7 +981,7 @@ def mock_multitrace(self):
             )
         return trace
 
-    @pytest.fixture(scope="class", params=["MultiTrace", "InferenceData", "Dataset"])
+    @pytest.fixture(scope="class", params=["MultiTrace", "DataTree", "Dataset"])
     def mock_sample_results(self, request, mock_multitrace):
         kind = request.param
         trace = mock_multitrace
@@ -1012,8 +1014,8 @@ def test_logging_sampled_basic_rvs_posterior_mutable(self, mock_sample_results,
                 ("pymc.sampling.forward", logging.INFO, "Sampling: [a, b, sigma, y]")
             ]
             caplog.clear()
-        elif kind == "InferenceData":
-            # InferenceData has all MCMC posterior samples and the values for both coordinates and
+        elif kind == "DataTree":
+            # DataTree has all MCMC posterior samples and the values for both coordinates and
             # data containers. This enables it to see that no data has changed and it should only
             # resample the observed variable
             assert caplog.record_tuples == [
@@ -1031,7 +1033,7 @@ def test_logging_sampled_basic_rvs_posterior_mutable(self, mock_sample_results,
 
         original_offsets = model["offsets"].get_value()
         with model:
-            # Changing the Data values. This will only be picked up by InferenceData
+            # Changing the Data values. This will only be picked up by DataTree
             pm.set_data({"offsets": original_offsets + 1})
             pm.sample_posterior_predictive(samples)
         if kind == "MultiTrace":
@@ -1039,7 +1041,7 @@ def test_logging_sampled_basic_rvs_posterior_mutable(self, mock_sample_results,
                 ("pymc.sampling.forward", logging.INFO, "Sampling: [a, b, sigma, y]")
             ]
             caplog.clear()
-        elif kind == "InferenceData":
+        elif kind == "DataTree":
             assert caplog.record_tuples == [
                 ("pymc.sampling.forward", logging.INFO, "Sampling: [b, y]")
             ]
@@ -1051,7 +1053,7 @@ def test_logging_sampled_basic_rvs_posterior_mutable(self, mock_sample_results,
             caplog.clear()
 
         with model:
-            # Changing the mutable coordinates. This will be picked up by InferenceData and Dataset
+            # Changing the mutable coordinates. This will be picked up by DataTree and Dataset
             model.set_dim("name", new_length=4, coord_values=["D", "E", "F", "G"])
             pm.set_data({"offsets": original_offsets, "y_obs": np.zeros((10, 4))})
             pm.sample_posterior_predictive(samples)
@@ -1060,7 +1062,7 @@ def test_logging_sampled_basic_rvs_posterior_mutable(self, mock_sample_results,
                 ("pymc.sampling.forward", logging.INFO, "Sampling: [a, b, sigma, y]")
             ]
             caplog.clear()
-        elif kind == "InferenceData":
+        elif kind == "DataTree":
             assert caplog.record_tuples == [
                 ("pymc.sampling.forward", logging.INFO, "Sampling: [a, sigma, y]")
             ]
@@ -1082,7 +1084,7 @@ def test_logging_sampled_basic_rvs_posterior_mutable(self, mock_sample_results,
                 ("pymc.sampling.forward", logging.INFO, "Sampling: [a, b, sigma, y]")
             ]
             caplog.clear()
-        elif kind == "InferenceData":
+        elif kind == "DataTree":
             assert caplog.record_tuples == [
                 ("pymc.sampling.forward", logging.INFO, "Sampling: [a, b, sigma, y]")
             ]
@@ -1106,7 +1108,7 @@ def test_observed_data_needed_in_pp(self):
 
             prior = pm.sample_prior_predictive(draws=25).prior
 
-        fake_idata = InferenceData(posterior=prior)
+        fake_idata = az_from_dict({"posterior": prior})
 
         new_coords = {"trial": range(2), "feature": range(3)}
         new_x_data = np.random.normal(size=(2, 3))
@@ -1130,7 +1132,7 @@ def test_observed_data_needed_in_pp(self):
 
             prior = pm.sample_prior_predictive(draws=25).prior
 
-        fake_idata = InferenceData(posterior=prior)
+        fake_idata = az_from_dict({"posterior": prior})
 
         with m:
             pm.set_data({"x_data": new_x_data}, coords=new_coords)
@@ -1407,7 +1409,7 @@ def test_pytensor_function_kwargs(self):
             y = pm.Deterministic("y", x + sharedvar)
 
             pp = pm.sample_posterior_predictive(
-                trace=az_from_dict({"x": np.arange(5)}),
+                trace=az_from_dict({"posterior": {"x": np.arange(5).reshape(1, 5)}}),
                 var_names=["y"],
                 return_inferencedata=False,
                 compile_kwargs={
@@ -1421,7 +1423,9 @@ def test_pytensor_function_kwargs(self):
     def test_sample_dims(self, point_list_arg_bug_fixture):
         pmodel, trace = point_list_arg_bug_fixture
         with pmodel:
-            post = pm.to_inference_data(trace).posterior.stack(sample=["chain", "draw"])
+            post = (
+                pm.to_inference_data(trace).posterior.to_dataset().stack(sample=["chain", "draw"])
+            )
             pp = pm.sample_posterior_predictive(post, var_names=["d"], sample_dims=["sample"])
             assert "sample" in pp.posterior_predictive
             assert len(pp.posterior_predictive["sample"]) == len(post["sample"])
@@ -1846,7 +1850,7 @@ def model_to_vectorize(has_nested_random_variables):
 
     with model:
         idata = pm.sample_prior_predictive(100)
-        idata.add_groups({"posterior": idata.prior})
+        idata.update({"posterior": idata.prior})
     return freeze_dims_and_data(model), idata
 
 
@@ -1939,7 +1943,7 @@ def test_vectorize_over_posterior_matches_sample():
                 )
             }
         )
-    idata = InferenceData(posterior=posterior)
+    idata = az_from_dict({"posterior": posterior})
     with model:
         pp = pm.sample_posterior_predictive(idata, var_names=["obs", "det"], random_seed=1234)
         vectorized = vectorize_over_posterior(
@@ -1967,7 +1971,7 @@ def test_vectorize_over_posterior_with_intermediate_rvs():
         c = b + 1
         d = pm.Normal.dist(c)
         idata = pm.sample_prior_predictive(100, var_names=["a"])
-        idata.add_groups({"posterior": idata.prior})
+        idata.update({"posterior": idata.prior})
     _, _, vectorized_no_intermediate = vectorize_over_posterior(
         outputs=[b, c, d],
         posterior=idata.posterior,
diff --git a/tests/sampling/test_mcmc.py b/tests/sampling/test_mcmc.py
index 090b76130b..5d2374092f 100644
--- a/tests/sampling/test_mcmc.py
+++ b/tests/sampling/test_mcmc.py
@@ -24,9 +24,9 @@
 import pytest
 import scipy.special
 
-from arviz import InferenceData
 from pytensor import shared
 from pytensor.compile.ops import as_op
+from xarray import DataTree
 
 import pymc as pm
 
@@ -377,7 +377,7 @@ def test_sample_return_lengths(self):
         assert mtrace_pst.report.n_tune == 50
         assert mtrace_pst.report.n_draws == 100
 
-        # InferenceData with warmup
+        # DataTree with warmup
         idata_w = pm.sampling.mcmc._sample_return(
             run=None,
             traces=traces,
@@ -390,13 +390,13 @@ def test_sample_return_lengths(self):
             idata_kwargs={},
             model=model,
         )
-        assert isinstance(idata_w, InferenceData)
+        assert isinstance(idata_w, DataTree)
         assert hasattr(idata_w, "warmup_posterior")
         assert idata_w.warmup_posterior.sizes["draw"] == 50
         assert idata_w.posterior.sizes["draw"] == 100
         assert idata_w.posterior.sizes["chain"] == 3
 
-        # InferenceData without warmup
+        # DataTree without warmup
         idata = pm.sampling.mcmc._sample_return(
             run=None,
             traces=traces,
@@ -409,7 +409,7 @@ def test_sample_return_lengths(self):
             idata_kwargs={},
             model=model,
         )
-        assert isinstance(idata, InferenceData)
+        assert isinstance(idata, DataTree)
         assert not hasattr(idata, "warmup_posterior")
         assert idata.posterior.sizes["draw"] == 100
         assert idata.posterior.sizes["chain"] == 3
@@ -458,7 +458,7 @@ def test_keep_warning_stat_setting(self, keep_warning_stat):
         if keep_warning_stat:
             assert "warning" in idata.warmup_sample_stats
             assert "warning" in idata.sample_stats
-            # And end up in the InferenceData
+            # And end up in the DataTree
             assert "warning" in idata.sample_stats
             # NOTE: The stats are squeezed by default but this does not always work.
             #       This tests flattens so we don't have to be exact in accessing (non-)squeezed items.
diff --git a/tests/smc/test_smc.py b/tests/smc/test_smc.py
index 493c0c8daa..8816e3a0bf 100644
--- a/tests/smc/test_smc.py
+++ b/tests/smc/test_smc.py
@@ -20,7 +20,7 @@
 import pytest
 import scipy.stats as st
 
-from arviz.data.inference_data import InferenceData
+from xarray import DataTree
 
 import pymc as pm
 
@@ -236,7 +236,7 @@ def test_return_datatype(self, chains):
                     progressbar=not (chains > 1 and _IS_WINDOWS),
                 )
 
-        assert isinstance(idata, InferenceData)
+        assert isinstance(idata, DataTree)
         assert "sample_stats" in idata
         assert idata.posterior.sizes["chain"] == chains
         assert idata.posterior.sizes["draw"] == draws
@@ -288,7 +288,7 @@ def test_normal_model(self):
             idata = pm.sample_smc(draws=2000, kernel=pm.smc.MH, progressbar=not _IS_WINDOWS)
         assert_random_state_equal(initial_rng_state, np.random.get_state())
 
-        post = idata.posterior.stack(sample=("chain", "draw"))
+        post = idata.posterior.to_dataset().stack(sample=("chain", "draw"))
         assert np.abs(post["mu"].mean() - 10) < 0.1
         assert np.abs(post["sigma"].mean() - 0.5) < 0.05
 
diff --git a/tests/stats/test_convergence.py b/tests/stats/test_convergence.py
index 52d5c5048c..3a25f9ff86 100644
--- a/tests/stats/test_convergence.py
+++ b/tests/stats/test_convergence.py
@@ -30,8 +30,10 @@
 )
 def test_warn_divergences(diverging, expected_phrase):
     idata = arviz.from_dict(
-        sample_stats={
-            "diverging": np.array([diverging, [0, 0, 0, 0]]).astype(bool),
+        {
+            "sample_stats": {
+                "diverging": np.array([diverging, [0, 0, 0, 0]]).astype(bool),
+            }
         }
     )
     warns = convergence.warn_divergences(idata)
@@ -41,8 +43,10 @@ def test_warn_divergences(diverging, expected_phrase):
 
 def test_warn_treedepth():
     idata = arviz.from_dict(
-        sample_stats={
-            "reached_max_treedepth": np.array([[0, 0, 0], [0, 1, 0]]).astype(bool),
+        {
+            "sample_stats": {
+                "reached_max_treedepth": np.array([[0, 0, 0], [0, 1, 0]]).astype(bool),
+            }
         }
     )
     warns = convergence.warn_treedepth(idata)
@@ -56,8 +60,10 @@ def test_warn_treedepth_multiple_samplers():
     max_treedepth[0, 0, 0] = True
     max_treedepth[2, 1, 1] = True
     idata = arviz.from_dict(
-        sample_stats={
-            "reached_max_treedepth": max_treedepth,
+        {
+            "sample_stats": {
+                "reached_max_treedepth": max_treedepth,
+            }
         }
     )
     warns = convergence.warn_treedepth(idata)
diff --git a/tests/stats/test_log_density.py b/tests/stats/test_log_density.py
index 7b2eb3774e..4bfee21e3b 100644
--- a/tests/stats/test_log_density.py
+++ b/tests/stats/test_log_density.py
@@ -17,7 +17,7 @@
 import pytest
 import scipy.stats as st
 
-from arviz import InferenceData, dict_to_dataset, from_dict
+from arviz import from_dict
 
 from pymc.distributions import Dirichlet, Normal
 from pymc.distributions.transforms import log
@@ -35,7 +35,7 @@ def test_basic(self, transform):
             x_value_var = m.rvs_to_values[x]
             y = Normal("y", x, observed=[0, 1, 2], dims=("test_dim",))
 
-            idata = InferenceData(posterior=dict_to_dataset({"x": np.arange(100).reshape(4, 25)}))
+            idata = from_dict({"posterior": {"x": np.arange(100).reshape(4, 25)}})
             res = compute_log_likelihood(idata)
 
         # Check we didn't erase the original mappings
@@ -61,7 +61,7 @@ def test_multivariate(self):
                 "y", a=p.exp(), observed=y_draws, dims=("test_event_dim", "test_support_dim")
             )
 
-            idata = InferenceData(posterior=dict_to_dataset({"p": p_draws}))
+            idata = from_dict({"posterior": {"p": p_draws}})
             res = compute_log_likelihood(idata)
 
         assert res.log_likelihood.sizes == {"chain": 4, "draw": 25, "test_event_dim": 10}
@@ -77,7 +77,7 @@ def test_var_names(self):
             y1 = Normal("y1", x, observed=[0, 1, 2])
             y2 = Normal("y2", x, observed=[3, 4])
 
-        idata = InferenceData(posterior=dict_to_dataset({"x": np.arange(100).reshape(4, 25)}))
+        idata = from_dict({"posterior": {"x": np.arange(100).reshape(4, 25)}})
 
         res_y1 = compute_log_likelihood(
             idata, var_names=["y1"], extend_inferencedata=False, model=m, progressbar=False
@@ -116,7 +116,7 @@ def test_invalid_var_names(self):
             x = Normal("x")
             y = Normal("y", x, observed=[0, 1, 2])
 
-            idata = InferenceData(posterior=dict_to_dataset({"x": np.arange(100).reshape(4, 25)}))
+            idata = from_dict({"posterior": {"x": np.arange(100).reshape(4, 25)}})
             with pytest.raises(ValueError, match="var_names must refer to observed_RVs"):
                 compute_log_likelihood(idata, var_names=["x"])
 
@@ -126,7 +126,7 @@ def test_dims_without_coords(self):
             x = Normal("x")
             y = Normal("y", x, observed=[0, 0, 0], shape=(3,), dims="obs")
 
-            trace = from_dict({"x": [[0, 1]]})
+            trace = from_dict({"posterior": {"x": np.array([[0, 1]])}})
             llike = compute_log_likelihood(trace)
 
         assert len(llike.log_likelihood["obs"]) == 3
@@ -143,7 +143,7 @@ def test_basic_log_prior(self, transform):
             x_value_var = m.rvs_to_values[x]
             Normal("y", x, observed=[0, 1, 2])
 
-            idata = InferenceData(posterior=dict_to_dataset({"x": np.arange(100).reshape(4, 25)}))
+            idata = from_dict({"posterior": {"x": np.arange(100).reshape(4, 25)}})
             res = compute_log_prior(idata)
 
         # Check we didn't erase the original mappings
@@ -164,7 +164,7 @@ def test_deterministic_log_prior(self):
             Deterministic("d", 2 * x)
             Normal("y", x, observed=[0, 1, 2])
 
-            idata = InferenceData(posterior=dict_to_dataset({"x": np.arange(100).reshape(4, 25)}))
+            idata = from_dict({"posterior": {"x": np.arange(100).reshape(4, 25)}})
             res = compute_log_prior(idata)
 
         assert res is idata
@@ -183,7 +183,7 @@ def test_compilation_kwargs(self):
             Deterministic("d", 2 * x)
             Normal("y", x, observed=[0, 1, 2])
 
-            idata = InferenceData(posterior=dict_to_dataset({"x": np.arange(100).reshape(4, 25)}))
+            idata = from_dict({"posterior": {"x": np.arange(100).reshape(4, 25)}})
             with (
                 # apply_function_over_dataset fails with patched `compile_pymc`
                 patch("pymc.stats.log_density.apply_function_over_dataset"),