Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/source/analysis/pandas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,17 @@ One can also combine all iterations in a single dataframe like this:
# like before but with a new column "iteration" and all particles
print(df)

Additionally, one can add additional openPMD particle species attributes, e.g.,
from the `ED-PIC <https://github.com/openPMD/openPMD-standard/blob/1.1.0/EXT_ED-PIC.md#particle-records-macroparticles>`__ extension
or `custom code properties <https://impactx.readthedocs.io/en/25.11/dataanalysis/dataanalysis.html#additional-beam-attributes>`__
as extra dataframe columns:

.. code-block:: python

df = s.to_df("electrons", attributes=["s_ref"])

# like before but with a new column "s_ref"
print(df)

.. _analysis-pandas-ascii:

Expand Down
15 changes: 8 additions & 7 deletions examples/11_particle_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,26 +38,27 @@
s = io.Series("../samples/git-sample/data%T.h5", io.Access.read_only)
electrons = s.snapshots()[400].particles["electrons"]

# all particles
df = electrons.to_df()
# all particles, extra column for "particleShape" attribute
# (from ED-PIC extension)
df = electrons.to_df(attributes=["particleShape"])
print(type(df) is pd.DataFrame)
print(df)

# only first 100 particles
df = electrons.to_df(np.s_[:100])
df = electrons.to_df(slice=np.s_[:100])
print(df)

# all particles over all steps
df = s.to_df("electrons")
df = s.to_df("electrons", attributes=["particleShape"])
print(df)

if found_cudf:
# all particles - to GPU
cdf = cudf.from_pandas(electrons.to_df())
cdf = cudf.from_pandas(electrons.to_df(attributes=["particleShape"]))
print(cdf)

# all particles over all steps - to GPU
cdf = s.to_cudf("electrons")
cdf = s.to_cudf("electrons", attributes=["particleShape"])
print(cdf)

# Particles
Expand All @@ -67,7 +68,7 @@
# pickle capabilities, so we test this here:
dask.config.set(scheduler='processes')

df = electrons.to_dask()
df = electrons.to_dask(attributes=["particleShape"])
print(df)

# check chunking of a variable
Expand Down
13 changes: 9 additions & 4 deletions src/binding/python/openpmd_api/DaskDataFrame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,22 @@
import numpy as np


def read_chunk_to_df(species, chunk):
def read_chunk_to_df(species, chunk, attributes=None):
stride = np.s_[chunk.offset[0]:chunk.offset[0]+chunk.extent[0]]
return species.to_df(stride)
return species.to_df(attributes=attributes, slice=stride)


def particles_to_daskdataframe(particle_species):
def particles_to_daskdataframe(particle_species, attributes=None):
"""
Load all records of a particle species into a Dask DataFrame.

Parameters
----------
particle_species : openpmd_api.ParticleSpecies
A ParticleSpecies class in openPMD-api.
attributes : list of strings, optional
A list of attributes of the particle_species that should be read and
added as extra columns.

Returns
-------
Expand Down Expand Up @@ -83,7 +86,9 @@ def particles_to_daskdataframe(particle_species):

# merge DataFrames
dfs = [
delayed(read_chunk_to_df)(particle_species, chunk) for chunk in chunks
delayed(read_chunk_to_df)(
particle_species, chunk=chunk, attributes=attributes
) for chunk in chunks
]
df = dd.from_delayed(dfs)

Expand Down
42 changes: 37 additions & 5 deletions src/binding/python/openpmd_api/DataFrame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,22 @@
import numpy as np


def particles_to_dataframe(particle_species, slice=None):
def particles_to_dataframe(particle_species,
*legacy_args,
attributes=None,
slice=None):
"""
Load all records of a particle species into a Pandas DataFrame.

Parameters
----------
particle_species : openpmd_api.ParticleSpecies
A ParticleSpecies class in openPMD-api.
legacy_args : tuple
DO NOT USE. Catch-all for legacy, unnamed arguments.
attributes : list of strings, optional
A list of attributes of the particle_species that should be read and
added as extra columns.
slice : np.s_, optional
A numpy slice that can be used to load only a sub-selection of
particles.
Expand All @@ -40,6 +48,20 @@ def particles_to_dataframe(particle_species, slice=None):
are optimal arguments for the slice parameter
pandas.DataFrame : the central dataframe object created here
"""
# backwards compatibility: in openPMD-api 0.17+, we added the
# additional "attributes" argument and moved slice= to the end.
if legacy_args:
if attributes is None and slice is None and len(legacy_args) == 1:
slice = legacy_args[0]
import warnings
warnings.warn("The to_df() argument order changed in "
"openPMD-api 0.17.0!\nThe slice "
"argument must be passed as a named argument.",
DeprecationWarning
)
else:
raise RuntimeError("to_df() does not support unnamed arguments!")

# import pandas here for a lazy import
try:
import pandas as pd
Expand Down Expand Up @@ -69,14 +91,18 @@ def particles_to_dataframe(particle_species, slice=None):

df = pd.DataFrame(columns)

if attributes is not None:
for attribute in attributes:
df[attribute] = particle_species.get_attribute(attribute)

# set a header for the first column (row index)
# note: this is NOT the particle id
df.index.name = "row"

return df


def iterations_to_dataframe(series, species_name):
def iterations_to_dataframe(series, species_name, attributes=None):
"""
Load all iterations of a particle species into a Pandas DataFrame.

Expand All @@ -86,6 +112,9 @@ def iterations_to_dataframe(series, species_name):
A Series class in openPMD-api.
species_name : string
The name of a particle species.
attributes : list of strings, optional
A list of attributes of the particle_species that should be read and
added as extra columns.

Returns
-------
Expand Down Expand Up @@ -115,7 +144,7 @@ def iterations_to_dataframe(series, species_name):
(
iteration
.particles[species_name]
.to_df()
.to_df(attributes=attributes)
.assign(iteration=i)
for i, iteration in series.snapshots().items()
),
Expand All @@ -126,7 +155,7 @@ def iterations_to_dataframe(series, species_name):
return df


def iterations_to_cudf(series, species_name):
def iterations_to_cudf(series, species_name, attributes=None):
"""
Load all iterations of a particle species into a cuDF DataFrame.

Expand All @@ -136,6 +165,9 @@ def iterations_to_cudf(series, species_name):
A Series class in openPMD-api.
species_name : string
The name of a particle species.
attributes : list of strings, optional
A list of attributes of the particle_species that should be read and
added as extra columns.

Returns
-------
Expand Down Expand Up @@ -172,7 +204,7 @@ def iterations_to_cudf(series, species_name):
cudf.from_pandas(
iteration
.particles[species_name]
.to_df()
.to_df(attributes=attributes)
.assign(iteration=i)
)
for i, iteration in series.snapshots().items()
Expand Down
Loading