diff --git a/doc/modules.rst b/doc/modules.rst index cfc49e67..627ba9d8 100644 --- a/doc/modules.rst +++ b/doc/modules.rst @@ -16,6 +16,7 @@ API Reference petab.v1.core petab.v1.distributions petab.v1.lint + petab.v1.math petab.v1.measurements petab.v1.models petab.v1.observables diff --git a/petab/v1/lint.py b/petab/v1/lint.py index b2260b83..e14289fb 100644 --- a/petab/v1/lint.py +++ b/petab/v1/lint.py @@ -53,6 +53,9 @@ "observable_table_has_nontrivial_noise_formula", ] +#: Regular expression pattern for valid PEtab IDs +_petab_id_pattern = re.compile(r"^[a-zA-Z_]\w*$") + def _check_df(df: pd.DataFrame, req_cols: Iterable, name: str) -> None: """Check if given columns are present in DataFrame @@ -1041,10 +1044,13 @@ def assert_model_parameters_in_condition_or_parameter_table( mapping_df[MODEL_ENTITY_ID], strict=True, ) - # mapping table entities mapping to already allowed parameters - if to_id in allowed_in_condition_cols - # mapping table entities mapping to species - or model.is_state_variable(to_id) + if not pd.isna(to_id) + and ( + # mapping table entities mapping to already allowed parameters + to_id in allowed_in_condition_cols + # mapping table entities mapping to species + or model.is_state_variable(to_id) + ) } allowed_in_parameter_table = ( @@ -1186,7 +1192,7 @@ def is_valid_identifier(x: str) -> bool: if pd.isna(x): return False - return re.match(r"^[a-zA-Z_]\w*$", x) is not None + return _petab_id_pattern.match(x) is not None def check_ids(ids: Iterable[str], kind: str = "") -> None: diff --git a/petab/v1/math/sympify.py b/petab/v1/math/sympify.py index cc81a000..8ef1a129 100644 --- a/petab/v1/math/sympify.py +++ b/petab/v1/math/sympify.py @@ -15,6 +15,11 @@ def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic: """Convert PEtab math expression to sympy expression. + .. note:: + + All symbols in the returned expression will have the `real=True` + assumption. + Args: expr: PEtab math expression. @@ -26,14 +31,22 @@ def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic: The sympy expression corresponding to `expr`. Boolean values are converted to numeric values. """ + if isinstance(expr, sp.Expr): + # TODO: check if only PEtab-compatible symbols and functions are used + return expr + if isinstance(expr, int) or isinstance(expr, np.integer): return sp.Integer(expr) if isinstance(expr, float) or isinstance(expr, np.floating): return sp.Float(expr) - # Set error listeners - input_stream = InputStream(expr) + try: + input_stream = InputStream(expr) + except TypeError as e: + raise TypeError(f"Error parsing {expr!r}: {e.args[0]}") from e + lexer = PetabMathExprLexer(input_stream) + # Set error listeners lexer.removeErrorListeners() lexer.addErrorListener(MathErrorListener()) diff --git a/petab/v2/C.py b/petab/v2/C.py index 617977c1..c94a1d29 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -125,28 +125,14 @@ #: Condition ID column in the condition table CONDITION_ID = "conditionId" -# TODO: removed? -#: Condition name column in the condition table -CONDITION_NAME = "conditionName" #: Column in the condition table with the ID of an entity that is changed TARGET_ID = "targetId" -#: Column in the condition table with the operation type -OPERATION_TYPE = "operationType" #: Column in the condition table with the new value of the target entity TARGET_VALUE = "targetValue" -# operation types: -OT_CUR_VAL = "setCurrentValue" -OT_NO_CHANGE = "noChange" - -OPERATION_TYPES = [ - OT_CUR_VAL, - OT_NO_CHANGE, -] CONDITION_DF_COLS = [ CONDITION_ID, TARGET_ID, - OPERATION_TYPE, TARGET_VALUE, ] @@ -161,25 +147,25 @@ # OBSERVABLES -#: Observable name column in the observables table +#: Observable name column in the observable table OBSERVABLE_NAME = "observableName" -#: Observable formula column in the observables table +#: Observable formula column in the observable table OBSERVABLE_FORMULA = "observableFormula" -#: Noise formula column in the observables table +#: Noise formula column in the observable table NOISE_FORMULA = "noiseFormula" -#: Observable transformation column in the observables table +#: Observable transformation column in the observable table OBSERVABLE_TRANSFORMATION = "observableTransformation" -#: Noise distribution column in the observables table +#: Noise distribution column in the observable table NOISE_DISTRIBUTION = "noiseDistribution" -#: Mandatory columns of observables table +#: Mandatory columns of observable table OBSERVABLE_DF_REQUIRED_COLS = [ OBSERVABLE_ID, OBSERVABLE_FORMULA, NOISE_FORMULA, ] -#: Optional columns of observables table +#: Optional columns of observable table OBSERVABLE_DF_OPTIONAL_COLS = [ OBSERVABLE_NAME, OBSERVABLE_TRANSFORMATION, @@ -382,6 +368,9 @@ PETAB_ENTITY_ID = "petabEntityId" #: Model entity ID column in the mapping table MODEL_ENTITY_ID = "modelEntityId" +#: Arbitrary name +NAME = "name" + #: Required columns of the mapping table MAPPING_DF_REQUIRED_COLS = [PETAB_ENTITY_ID, MODEL_ENTITY_ID] @@ -389,7 +378,7 @@ #: Simulated value column in the simulation table SIMULATION = "simulation" -#: Residual value column in the residuals table +#: Residual value column in the residual table RESIDUAL = "residual" #: ??? NOISE_VALUE = "noiseValue" diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index 4d147828..4f8d28ea 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -5,27 +5,31 @@ from warnings import warn -# TODO: remove v1 star imports -from ..v1.calculate import * # noqa: F403, F401, E402 -from ..v1.composite_problem import * # noqa: F403, F401, E402 -from ..v1.core import * # noqa: F403, F401, E402 -from ..v1.format_version import __format_version__ # noqa: F401, E402 -from ..v1.mapping import * # noqa: F403, F401, E402 -from ..v1.measurements import * # noqa: F403, F401, E402 -from ..v1.observables import * # noqa: F403, F401, E402 -from ..v1.parameter_mapping import * # noqa: F403, F401, E402 -from ..v1.parameters import * # noqa: F403, F401, E402 -from ..v1.sampling import * # noqa: F403, F401, E402 -from ..v1.sbml import * # noqa: F403, F401, E402 -from ..v1.simulate import * # noqa: F403, F401, E402 -from ..v1.yaml import * # noqa: F403, F401, E402 - warn( "Support for PEtab2.0 and all of petab.v2 is experimental " "and subject to changes!", stacklevel=1, ) +# TODO: move this module to v2 +from petab.v1.mapping import ( # noqa: F403, F401, E402 + get_mapping_df, + write_mapping_df, +) +from petab.v1.measurements import ( # noqa: F401, E402 + get_measurement_df, + write_measurement_df, +) +from petab.v1.observables import ( # noqa: F401, E402 + get_observable_df, + write_observable_df, +) +from petab.v1.parameters import ( # noqa: F401, E402 + get_parameter_df, + write_parameter_df, +) +from petab.v1.yaml import load_yaml # noqa: F401, E402 + # import after v1 from ..version import __version__ # noqa: F401, E402 from . import ( # noqa: F401, E402 @@ -38,5 +42,5 @@ write_experiment_df, ) from .lint import lint_problem # noqa: F401, E402 -from .models import Model # noqa: F401, E402 -from .problem import Problem # noqa: F401, E402 +from .models import MODEL_TYPE_PYSB, MODEL_TYPE_SBML, Model # noqa: F401, E402 +from .problem import Problem, ProblemConfig # noqa: F401, E402 diff --git a/petab/v2/conditions.py b/petab/v2/conditions.py index 8d5a3067..deea1a0c 100644 --- a/petab/v2/conditions.py +++ b/petab/v2/conditions.py @@ -5,12 +5,8 @@ from pathlib import Path import pandas as pd -import sympy as sp -from .. import v2 -from ..v1.math import sympify_petab -from .C import * -from .lint import assert_no_leading_trailing_whitespace +from ..v1.lint import assert_no_leading_trailing_whitespace __all__ = [ "get_condition_df", @@ -50,19 +46,3 @@ def write_condition_df(df: pd.DataFrame, filename: str | Path) -> None: """ df = get_condition_df(df) df.to_csv(filename, sep="\t", index=False) - - -def get_condition_table_free_symbols(problem: v2.Problem) -> set[sp.Basic]: - """Free symbols from condition table assignments. - - Collects all free symbols from the condition table `targetValue` column. - - :returns: Set of free symbols. - """ - if problem.condition_df is None: - return set() - - free_symbols = set() - for target_value in problem.condition_df[TARGET_VALUE]: - free_symbols |= sympify_petab(target_value).free_symbols - return free_symbols diff --git a/petab/v2/core.py b/petab/v2/core.py index 3c23f652..10088b62 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -2,37 +2,43 @@ from __future__ import annotations +import re +from collections.abc import Sequence from enum import Enum +from itertools import chain from pathlib import Path +from typing import Annotated, Literal import numpy as np import pandas as pd import sympy as sp from pydantic import ( + AfterValidator, BaseModel, + BeforeValidator, ConfigDict, Field, ValidationInfo, field_validator, model_validator, ) +from typing_extensions import Self from ..v1.lint import is_valid_identifier from ..v1.math import sympify_petab -from . import C +from . import C, get_observable_df __all__ = [ "Observable", - "ObservablesTable", + "ObservableTable", "ObservableTransformation", "NoiseDistribution", "Change", - "ChangeSet", - "ConditionsTable", - "OperationType", + "Condition", + "ConditionTable", "ExperimentPeriod", "Experiment", - "ExperimentsTable", + "ExperimentTable", "Measurement", "MeasurementTable", "Mapping", @@ -43,6 +49,43 @@ ] +def _is_finite_or_neg_inf(v: float, info: ValidationInfo) -> float: + if not np.isfinite(v) and v != -np.inf: + raise ValueError( + f"{info.field_name} value must be finite or -inf but got {v}" + ) + return v + + +def _is_finite_or_pos_inf(v: float, info: ValidationInfo) -> float: + if not np.isfinite(v) and v != np.inf: + raise ValueError( + f"{info.field_name} value must be finite or inf but got {v}" + ) + return v + + +def _not_nan(v: float, info: ValidationInfo) -> float: + if np.isnan(v): + raise ValueError(f"{info.field_name} value must not be nan.") + return v + + +def _convert_nan_to_none(v): + if isinstance(v, float) and np.isnan(v): + return None + return v + + +def _valid_petab_id(v: str) -> str: + """Field validator for PEtab IDs.""" + if not v: + raise ValueError("ID must not be empty.") + if not is_valid_identifier(v): + raise ValueError(f"Invalid ID: {v}") + return v + + class ObservableTransformation(str, Enum): """Observable transformation types. @@ -116,7 +159,9 @@ class Observable(BaseModel): """Observable definition.""" #: Observable ID. - id: str = Field(alias=C.OBSERVABLE_ID) + id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.OBSERVABLE_ID + ) #: Observable name. name: str | None = Field(alias=C.OBSERVABLE_NAME, default=None) #: Observable formula. @@ -132,14 +177,10 @@ class Observable(BaseModel): alias=C.NOISE_DISTRIBUTION, default=NoiseDistribution.NORMAL ) - @field_validator("id") - @classmethod - def _validate_id(cls, v): - if not v: - raise ValueError("ID must not be empty.") - if not is_valid_identifier(v): - raise ValueError(f"Invalid ID: {v}") - return v + #: :meta private: + model_config = ConfigDict( + arbitrary_types_allowed=True, populate_by_name=True, extra="allow" + ) @field_validator( "name", @@ -166,13 +207,40 @@ def _sympify(cls, v): return sympify_petab(v) - #: :meta private: - model_config = ConfigDict( - arbitrary_types_allowed=True, populate_by_name=True - ) - - -class ObservablesTable(BaseModel): + def _placeholders( + self, type_: Literal["observable", "noise"] + ) -> set[sp.Symbol]: + formula = ( + self.formula + if type_ == "observable" + else self.noise_formula + if type_ == "noise" + else None + ) + if formula is None or formula.is_number: + return set() + + if not (free_syms := formula.free_symbols): + return set() + + # TODO: add field validator to check for 1-based consecutive numbering + t = f"{re.escape(type_)}Parameter" + o = re.escape(self.id) + pattern = re.compile(rf"(?:^|\W)({t}\d+_{o})(?=\W|$)") + return {s for s in free_syms if pattern.match(str(s))} + + @property + def observable_placeholders(self) -> set[sp.Symbol]: + """Placeholder symbols for the observable formula.""" + return self._placeholders("observable") + + @property + def noise_placeholders(self) -> set[sp.Symbol]: + """Placeholder symbols for the noise formula.""" + return self._placeholders("noise") + + +class ObservableTable(BaseModel): """PEtab observables table.""" #: List of observables. @@ -186,11 +254,12 @@ def __getitem__(self, observable_id: str) -> Observable: raise KeyError(f"Observable ID {observable_id} not found") @classmethod - def from_df(cls, df: pd.DataFrame) -> ObservablesTable: + def from_df(cls, df: pd.DataFrame) -> ObservableTable: """Create an ObservablesTable from a DataFrame.""" if df is None: return cls(observables=[]) + df = get_observable_df(df) observables = [ Observable(**row.to_dict()) for _, row in df.reset_index().iterrows() @@ -200,10 +269,31 @@ def from_df(cls, df: pd.DataFrame) -> ObservablesTable: def to_df(self) -> pd.DataFrame: """Convert the ObservablesTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["observables"]) + records = self.model_dump(by_alias=True)["observables"] + for record in records: + obs = record[C.OBSERVABLE_FORMULA] + noise = record[C.NOISE_FORMULA] + record[C.OBSERVABLE_FORMULA] = ( + None + if obs is None + # TODO: we need a custom printer for sympy expressions + # to avoid '**' + # https://github.com/PEtab-dev/libpetab-python/issues/362 + else str(obs) + if not obs.is_number + else float(obs) + ) + record[C.NOISE_FORMULA] = ( + None + if noise is None + else str(noise) + if not noise.is_number + else float(noise) + ) + return pd.DataFrame(records).set_index([C.OBSERVABLE_ID]) @classmethod - def from_tsv(cls, file_path: str | Path) -> ObservablesTable: + def from_tsv(cls, file_path: str | Path) -> ObservableTable: """Create an ObservablesTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) @@ -211,15 +301,15 @@ def from_tsv(cls, file_path: str | Path) -> ObservablesTable: def to_tsv(self, file_path: str | Path) -> None: """Write the ObservablesTable to a TSV file.""" df = self.to_df() - df.to_csv(file_path, sep="\t", index=False) + df.to_csv(file_path, sep="\t", index=True) - def __add__(self, other: Observable) -> ObservablesTable: + def __add__(self, other: Observable) -> ObservableTable: """Add an observable to the table.""" if not isinstance(other, Observable): raise TypeError("Can only add Observable to ObservablesTable") - return ObservablesTable(observables=self.observables + [other]) + return ObservableTable(observables=self.observables + [other]) - def __iadd__(self, other: Observable) -> ObservablesTable: + def __iadd__(self, other: Observable) -> ObservableTable: """Add an observable to the table in place.""" if not isinstance(other, Observable): raise TypeError("Can only add Observable to ObservablesTable") @@ -227,58 +317,34 @@ def __iadd__(self, other: Observable) -> ObservablesTable: return self -# TODO remove?! -class OperationType(str, Enum): - """Operation types for model changes in the PEtab conditions table.""" - - # TODO update names - SET_CURRENT_VALUE = "setCurrentValue" - NO_CHANGE = "noChange" - ... - - class Change(BaseModel): """A change to the model or model state. A change to the model or model state, corresponding to an individual - row of the PEtab conditions table. + row of the PEtab condition table. >>> Change( ... target_id="k1", - ... operation_type=OperationType.SET_CURRENT_VALUE, ... target_value="10", ... ) # doctest: +NORMALIZE_WHITESPACE - Change(target_id='k1', operation_type='setCurrentValue', - target_value=10.0000000000000) + Change(target_id='k1', target_value=10.0000000000000) """ #: The ID of the target entity to change. - target_id: str | None = Field(alias=C.TARGET_ID, default=None) - # TODO: remove?! - operation_type: OperationType = Field(alias=C.OPERATION_TYPE) + target_id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.TARGET_ID + ) #: The value to set the target entity to. - target_value: sp.Basic | None = Field(alias=C.TARGET_VALUE, default=None) + target_value: sp.Basic = Field(alias=C.TARGET_VALUE) #: :meta private: model_config = ConfigDict( arbitrary_types_allowed=True, populate_by_name=True, use_enum_values=True, + extra="allow", ) - @model_validator(mode="before") - @classmethod - def _validate_id(cls, data: dict): - if ( - data.get("operation_type", data.get(C.OPERATION_TYPE)) - != C.OT_NO_CHANGE - ): - target_id = data.get("target_id", data.get(C.TARGET_ID)) - - if not is_valid_identifier(target_id): - raise ValueError(f"Invalid ID: {target_id}") - return data - @field_validator("target_value", mode="before") @classmethod def _sympify(cls, v): @@ -290,65 +356,57 @@ def _sympify(cls, v): return sympify_petab(v) -class ChangeSet(BaseModel): +class Condition(BaseModel): """A set of changes to the model or model state. A set of simultaneously occurring changes to the model or model state, corresponding to a perturbation of the underlying system. This corresponds to all rows of the PEtab conditions table with the same condition ID. - >>> ChangeSet( + >>> Condition( ... id="condition1", ... changes=[ ... Change( ... target_id="k1", - ... operation_type=OperationType.SET_CURRENT_VALUE, ... target_value="10", ... ) ... ], ... ) # doctest: +NORMALIZE_WHITESPACE - ChangeSet(id='condition1', changes=[Change(target_id='k1', - operation_type='setCurrentValue', target_value=10.0000000000000)]) + Condition(id='condition1', + changes=[Change(target_id='k1', target_value=10.0000000000000)]) """ #: The condition ID. - id: str = Field(alias=C.CONDITION_ID) + id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.CONDITION_ID + ) #: The changes associated with this condition. changes: list[Change] #: :meta private: - model_config = ConfigDict(populate_by_name=True) + model_config = ConfigDict(populate_by_name=True, extra="allow") - @field_validator("id") - @classmethod - def _validate_id(cls, v): - if not v: - raise ValueError("ID must not be empty.") - if not is_valid_identifier(v): - raise ValueError(f"Invalid ID: {v}") - return v - - def __add__(self, other: Change) -> ChangeSet: + def __add__(self, other: Change) -> Condition: """Add a change to the set.""" if not isinstance(other, Change): - raise TypeError("Can only add Change to ChangeSet") - return ChangeSet(id=self.id, changes=self.changes + [other]) + raise TypeError("Can only add Change to Condition") + return Condition(id=self.id, changes=self.changes + [other]) - def __iadd__(self, other: Change) -> ChangeSet: + def __iadd__(self, other: Change) -> Condition: """Add a change to the set in place.""" if not isinstance(other, Change): - raise TypeError("Can only add Change to ChangeSet") + raise TypeError("Can only add Change to Condition") self.changes.append(other) return self -class ConditionsTable(BaseModel): +class ConditionTable(BaseModel): """PEtab conditions table.""" #: List of conditions. - conditions: list[ChangeSet] = [] + conditions: list[Condition] = [] - def __getitem__(self, condition_id: str) -> ChangeSet: + def __getitem__(self, condition_id: str) -> Condition: """Get a condition by ID.""" for condition in self.conditions: if condition.id == condition_id: @@ -356,29 +414,39 @@ def __getitem__(self, condition_id: str) -> ChangeSet: raise KeyError(f"Condition ID {condition_id} not found") @classmethod - def from_df(cls, df: pd.DataFrame) -> ConditionsTable: + def from_df(cls, df: pd.DataFrame) -> ConditionTable: """Create a ConditionsTable from a DataFrame.""" - if df is None: + if df is None or df.empty: return cls(conditions=[]) conditions = [] for condition_id, sub_df in df.groupby(C.CONDITION_ID): - changes = [Change(**row.to_dict()) for _, row in sub_df.iterrows()] - conditions.append(ChangeSet(id=condition_id, changes=changes)) + changes = [Change(**row) for row in sub_df.to_dict("records")] + conditions.append(Condition(id=condition_id, changes=changes)) return cls(conditions=conditions) def to_df(self) -> pd.DataFrame: """Convert the ConditionsTable to a DataFrame.""" records = [ - {C.CONDITION_ID: condition.id, **change.model_dump()} + {C.CONDITION_ID: condition.id, **change.model_dump(by_alias=True)} for condition in self.conditions for change in condition.changes ] - return pd.DataFrame(records) + for record in records: + record[C.TARGET_VALUE] = ( + float(record[C.TARGET_VALUE]) + if record[C.TARGET_VALUE].is_number + else str(record[C.TARGET_VALUE]) + ) + return ( + pd.DataFrame(records) + if records + else pd.DataFrame(columns=C.CONDITION_DF_REQUIRED_COLS) + ) @classmethod - def from_tsv(cls, file_path: str | Path) -> ConditionsTable: + def from_tsv(cls, file_path: str | Path) -> ConditionTable: """Create a ConditionsTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) @@ -388,19 +456,36 @@ def to_tsv(self, file_path: str | Path) -> None: df = self.to_df() df.to_csv(file_path, sep="\t", index=False) - def __add__(self, other: ChangeSet) -> ConditionsTable: + def __add__(self, other: Condition) -> ConditionTable: """Add a condition to the table.""" - if not isinstance(other, ChangeSet): - raise TypeError("Can only add ChangeSet to ConditionsTable") - return ConditionsTable(conditions=self.conditions + [other]) + if not isinstance(other, Condition): + raise TypeError("Can only add Conditions to ConditionsTable") + return ConditionTable(conditions=self.conditions + [other]) - def __iadd__(self, other: ChangeSet) -> ConditionsTable: + def __iadd__(self, other: Condition) -> ConditionTable: """Add a condition to the table in place.""" - if not isinstance(other, ChangeSet): - raise TypeError("Can only add ChangeSet to ConditionsTable") + if not isinstance(other, Condition): + raise TypeError("Can only add Conditions to ConditionsTable") self.conditions.append(other) return self + @property + def free_symbols(self) -> set[sp.Symbol]: + """Get all free symbols in the conditions table. + + This includes all free symbols in the target values of the changes, + independently of whether it is referenced by any experiment, or + (indirectly) by any measurement. + """ + return set( + chain.from_iterable( + change.target_value.free_symbols + for condition in self.conditions + for change in condition.changes + if change.target_value is not None + ) + ) + class ExperimentPeriod(BaseModel): """A period of a timecourse or experiment defined by a start time @@ -410,18 +495,20 @@ class ExperimentPeriod(BaseModel): """ #: The start time of the period in time units as defined in the model. - start: float = Field(alias=C.TIME) + time: Annotated[float, AfterValidator(_is_finite_or_neg_inf)] = Field( + alias=C.TIME + ) #: The ID of the condition to be applied at the start time. - condition_id: str = Field(alias=C.CONDITION_ID) + condition_id: str | None = Field(alias=C.CONDITION_ID, default=None) #: :meta private: - model_config = ConfigDict(populate_by_name=True) + model_config = ConfigDict(populate_by_name=True, extra="allow") - @field_validator("condition_id") + @field_validator("condition_id", mode="before") @classmethod def _validate_id(cls, condition_id): - if not condition_id: - raise ValueError("ID must not be empty.") + if pd.isna(condition_id) or not condition_id: + return None if not is_valid_identifier(condition_id): raise ValueError(f"Invalid ID: {condition_id}") return condition_id @@ -436,24 +523,17 @@ class Experiment(BaseModel): """ #: The experiment ID. - id: str = Field(alias=C.EXPERIMENT_ID) + id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.EXPERIMENT_ID + ) #: The periods of the experiment. periods: list[ExperimentPeriod] = [] #: :meta private: model_config = ConfigDict( - arbitrary_types_allowed=True, populate_by_name=True + arbitrary_types_allowed=True, populate_by_name=True, extra="allow" ) - @field_validator("id") - @classmethod - def _validate_id(cls, v): - if not v: - raise ValueError("ID must not be empty.") - if not is_valid_identifier(v): - raise ValueError(f"Invalid ID: {v}") - return v - def __add__(self, other: ExperimentPeriod) -> Experiment: """Add a period to the experiment.""" if not isinstance(other, ExperimentPeriod): @@ -468,14 +548,14 @@ def __iadd__(self, other: ExperimentPeriod) -> Experiment: return self -class ExperimentsTable(BaseModel): +class ExperimentTable(BaseModel): """PEtab experiments table.""" #: List of experiments. experiments: list[Experiment] @classmethod - def from_df(cls, df: pd.DataFrame) -> ExperimentsTable: + def from_df(cls, df: pd.DataFrame) -> ExperimentTable: """Create an ExperimentsTable from a DataFrame.""" if df is None: return cls(experiments=[]) @@ -484,7 +564,7 @@ def from_df(cls, df: pd.DataFrame) -> ExperimentsTable: for experiment_id, cur_exp_df in df.groupby(C.EXPERIMENT_ID): periods = [ ExperimentPeriod( - start=row[C.TIME], condition_id=row[C.CONDITION_ID] + time=row[C.TIME], condition_id=row[C.CONDITION_ID] ) for _, row in cur_exp_df.iterrows() ] @@ -494,10 +574,22 @@ def from_df(cls, df: pd.DataFrame) -> ExperimentsTable: def to_df(self) -> pd.DataFrame: """Convert the ExperimentsTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["experiments"]) + records = [ + { + C.EXPERIMENT_ID: experiment.id, + **period.model_dump(by_alias=True), + } + for experiment in self.experiments + for period in experiment.periods + ] + return ( + pd.DataFrame(records) + if records + else pd.DataFrame(columns=C.EXPERIMENT_DF_REQUIRED_COLS) + ) @classmethod - def from_tsv(cls, file_path: str | Path) -> ExperimentsTable: + def from_tsv(cls, file_path: str | Path) -> ExperimentTable: """Create an ExperimentsTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) @@ -507,19 +599,26 @@ def to_tsv(self, file_path: str | Path) -> None: df = self.to_df() df.to_csv(file_path, sep="\t", index=False) - def __add__(self, other: Experiment) -> ExperimentsTable: + def __add__(self, other: Experiment) -> ExperimentTable: """Add an experiment to the table.""" if not isinstance(other, Experiment): raise TypeError("Can only add Experiment to ExperimentsTable") - return ExperimentsTable(experiments=self.experiments + [other]) + return ExperimentTable(experiments=self.experiments + [other]) - def __iadd__(self, other: Experiment) -> ExperimentsTable: + def __iadd__(self, other: Experiment) -> ExperimentTable: """Add an experiment to the table in place.""" if not isinstance(other, Experiment): raise TypeError("Can only add Experiment to ExperimentsTable") self.experiments.append(other) return self + def __getitem__(self, item): + """Get an experiment by ID.""" + for experiment in self.experiments: + if experiment.id == item: + return experiment + raise KeyError(f"Experiment ID {item} not found") + class Measurement(BaseModel): """A measurement. @@ -533,9 +632,13 @@ class Measurement(BaseModel): #: The experiment ID. experiment_id: str | None = Field(alias=C.EXPERIMENT_ID, default=None) #: The time point of the measurement in time units as defined in the model. - time: float = Field(alias=C.TIME) + time: Annotated[float, AfterValidator(_is_finite_or_pos_inf)] = Field( + alias=C.TIME + ) #: The measurement value. - measurement: float = Field(alias=C.MEASUREMENT) + measurement: Annotated[float, AfterValidator(_not_nan)] = Field( + alias=C.MEASUREMENT + ) #: Values for placeholder parameters in the observable formula. observable_parameters: list[sp.Basic] = Field( alias=C.OBSERVABLE_PARAMETERS, default_factory=list @@ -547,7 +650,7 @@ class Measurement(BaseModel): #: :meta private: model_config = ConfigDict( - arbitrary_types_allowed=True, populate_by_name=True + arbitrary_types_allowed=True, populate_by_name=True, extra="allow" ) @field_validator( @@ -578,12 +681,17 @@ def _validate_id(cls, v, info: ValidationInfo): ) @classmethod def _sympify_list(cls, v): + if v is None: + return [] + if isinstance(v, float) and np.isnan(v): return [] + if isinstance(v, str): v = v.split(C.PARAMETER_SEPARATOR) - else: + elif not isinstance(v, Sequence): v = [v] + return [sympify_petab(x) for x in v] @@ -613,7 +721,16 @@ def from_df( def to_df(self) -> pd.DataFrame: """Convert the MeasurementTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["measurements"]) + records = self.model_dump(by_alias=True)["measurements"] + for record in records: + record[C.OBSERVABLE_PARAMETERS] = C.PARAMETER_SEPARATOR.join( + map(str, record[C.OBSERVABLE_PARAMETERS]) + ) + record[C.NOISE_PARAMETERS] = C.PARAMETER_SEPARATOR.join( + map(str, record[C.NOISE_PARAMETERS]) + ) + + return pd.DataFrame(records) @classmethod def from_tsv(cls, file_path: str | Path) -> MeasurementTable: @@ -644,23 +761,20 @@ class Mapping(BaseModel): """Mapping PEtab entities to model entities.""" #: PEtab entity ID. - petab_id: str = Field(alias=C.PETAB_ENTITY_ID) + petab_id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.PETAB_ENTITY_ID + ) #: Model entity ID. - model_id: str = Field(alias=C.MODEL_ENTITY_ID) + model_id: Annotated[str | None, BeforeValidator(_convert_nan_to_none)] = ( + Field(alias=C.MODEL_ENTITY_ID, default=None) + ) + #: Arbitrary name + name: Annotated[str | None, BeforeValidator(_convert_nan_to_none)] = Field( + alias=C.NAME, default=None + ) #: :meta private: - model_config = ConfigDict(populate_by_name=True) - - @field_validator( - "petab_id", - ) - @classmethod - def _validate_id(cls, v): - if not v: - raise ValueError("ID must not be empty.") - if not is_valid_identifier(v): - raise ValueError(f"Invalid ID: {v}") - return v + model_config = ConfigDict(populate_by_name=True, extra="allow") class MappingTable(BaseModel): @@ -683,7 +797,12 @@ def from_df(cls, df: pd.DataFrame) -> MappingTable: def to_df(self) -> pd.DataFrame: """Convert the MappingTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["mappings"]) + res = ( + pd.DataFrame(self.model_dump(by_alias=True)["mappings"]) + if self.mappings + else pd.DataFrame(columns=C.MAPPING_DF_REQUIRED_COLS) + ) + return res.set_index([C.PETAB_ENTITY_ID]) @classmethod def from_tsv(cls, file_path: str | Path) -> MappingTable: @@ -709,6 +828,20 @@ def __iadd__(self, other: Mapping) -> MappingTable: self.mappings.append(other) return self + def __getitem__(self, petab_id: str) -> Mapping: + """Get a mapping by PEtab ID.""" + for mapping in self.mappings: + if mapping.petab_id == petab_id: + return mapping + raise KeyError(f"PEtab ID {petab_id} not found") + + def get(self, petab_id, default=None): + """Get a mapping by PEtab ID or return a default value.""" + try: + return self[petab_id] + except KeyError: + return default + class Parameter(BaseModel): """Parameter definition.""" @@ -722,18 +855,24 @@ class Parameter(BaseModel): #: Nominal value. nominal_value: float | None = Field(alias=C.NOMINAL_VALUE, default=None) #: Parameter scale. + # TODO: keep or remove? scale: ParameterScale = Field( alias=C.PARAMETER_SCALE, default=ParameterScale.LIN ) + # TODO: change to bool in PEtab, or serialize as 0/1? + # https://github.com/PEtab-dev/PEtab/discussions/610 #: Is the parameter to be estimated? estimate: bool = Field(alias=C.ESTIMATE, default=True) + # TODO priors + # pydantic vs. petab.v1.priors.Prior #: :meta private: model_config = ConfigDict( arbitrary_types_allowed=True, populate_by_name=True, use_enum_values=True, + extra="allow", ) @field_validator("id") @@ -745,6 +884,28 @@ def _validate_id(cls, v): raise ValueError(f"Invalid ID: {v}") return v + @field_validator("estimate", mode="before") + @classmethod + def _validate_estimate_before(cls, v): + if isinstance(v, bool): + return v + + # FIXME: grace period for 0/1 values until the test suite was updated + if v in [0, 1, "0", "1"]: + return bool(int(v)) + + # TODO: clarify whether extra whitespace is allowed + if isinstance(v, str): + v = v.strip().lower() + if v == "true": + return True + if v == "false": + return False + + raise ValueError( + f"Invalid value for estimate: {v}. Must be `true` or `false`." + ) + @field_validator("lb", "ub", "nominal_value") @classmethod def _convert_nan_to_none(cls, v): @@ -752,6 +913,33 @@ def _convert_nan_to_none(cls, v): return None return v + @model_validator(mode="after") + def _validate(self) -> Self: + if not self.estimate and self.nominal_value is None: + raise ValueError( + "Non-estimated parameter must have a nominal value" + ) + + if self.estimate and (self.lb is None or self.ub is None): + raise ValueError( + "Estimated parameter must have lower and upper bounds set" + ) + + # TODO: also if not estimated? + if ( + self.estimate + and self.lb is not None + and self.ub is not None + and self.lb >= self.ub + ): + raise ValueError("Lower bound must be less than upper bound.") + + # TODO parameterScale? + + # TODO priorType, priorParameters + + return self + class ParameterTable(BaseModel): """PEtab parameter table.""" @@ -774,7 +962,9 @@ def from_df(cls, df: pd.DataFrame) -> ParameterTable: def to_df(self) -> pd.DataFrame: """Convert the ParameterTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["parameters"]) + return pd.DataFrame( + self.model_dump(by_alias=True)["parameters"] + ).set_index([C.PARAMETER_ID]) @classmethod def from_tsv(cls, file_path: str | Path) -> ParameterTable: @@ -799,3 +989,15 @@ def __iadd__(self, other: Parameter) -> ParameterTable: raise TypeError("Can only add Parameter to ParameterTable") self.parameters.append(other) return self + + def __getitem__(self, item) -> Parameter: + """Get a parameter by ID.""" + for parameter in self.parameters: + if parameter.id == item: + return parameter + raise KeyError(f"Parameter ID {item} not found") + + @property + def n_estimated(self) -> int: + """Number of estimated parameters.""" + return sum(p.estimate for p in self.parameters) diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 2deb0ebd..71d655dd 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -4,42 +4,15 @@ import logging from abc import ABC, abstractmethod -from collections import OrderedDict +from collections import Counter, OrderedDict from collections.abc import Set from dataclasses import dataclass, field from enum import IntEnum from pathlib import Path -import numpy as np import pandas as pd +import sympy as sp -from .. import v2 -from ..v1.lint import ( - _check_df, - assert_measured_observables_defined, - assert_measurements_not_null, - assert_measurements_numeric, - assert_model_parameters_in_condition_or_parameter_table, - assert_no_leading_trailing_whitespace, - assert_parameter_bounds_are_numeric, - assert_parameter_estimate_is_boolean, - assert_parameter_id_is_string, - assert_parameter_prior_parameters_are_valid, - assert_parameter_prior_type_is_valid, - assert_parameter_scale_is_valid, - assert_unique_observable_ids, - assert_unique_parameter_ids, - check_ids, - check_observable_df, - check_parameter_bounds, -) -from ..v1.measurements import ( - assert_overrides_match_parameter_count, - split_parameter_replacement_list, -) -from ..v1.observables import get_output_parameters, get_placeholders -from ..v1.visualize.lint import validate_visualization_df -from ..v2.C import * from .problem import Problem logger = logging.getLogger(__name__) @@ -51,17 +24,18 @@ "ValidationError", "ValidationTask", "CheckModel", - "CheckTableExists", - "CheckValidPetabIdColumn", - "CheckMeasurementTable", - "CheckConditionTable", - "CheckObservableTable", - "CheckParameterTable", + "CheckProblemConfig", + "CheckPosLogMeasurements", + "CheckValidConditionTargets", + "CheckUniquePrimaryKeys", "CheckExperimentTable", "CheckExperimentConditionsExist", "CheckAllParametersPresentInParameterTable", "CheckValidParameterInConditionOrParameterTable", "CheckVisualizationTable", + "CheckUnusedExperiments", + "CheckObservablesDoNotShadowModelEntities", + "CheckUnusedConditions", "lint_problem", "default_validation_tasks", ] @@ -91,6 +65,7 @@ class ValidationIssue: level: ValidationIssueSeverity message: str + task: str | None = None def __post_init__(self): if not isinstance(self.level, ValidationIssueSeverity): @@ -101,6 +76,18 @@ def __post_init__(self): def __str__(self): return f"{self.level.name}: {self.message}" + def _get_task_name(self): + """Get the name of the ValidationTask that raised this error.""" + import inspect + + # walk up the stack until we find the ValidationTask.run method + for frame_info in inspect.stack(): + frame = frame_info.frame + if "self" in frame.f_locals: + task = frame.f_locals["self"] + if isinstance(task, ValidationTask): + return task.__class__.__name__ + @dataclass class ValidationError(ValidationIssue): @@ -109,23 +96,23 @@ class ValidationError(ValidationIssue): level: ValidationIssueSeverity = field( default=ValidationIssueSeverity.ERROR, init=False ) - task: str | None = None def __post_init__(self): if self.task is None: self.task = self._get_task_name() - def _get_task_name(self): - """Get the name of the ValidationTask that raised this error.""" - import inspect - # walk up the stack until we find the ValidationTask.run method - for frame_info in inspect.stack(): - frame = frame_info.frame - if "self" in frame.f_locals: - task = frame.f_locals["self"] - if isinstance(task, ValidationTask): - return task.__class__.__name__ +@dataclass +class ValidationWarning(ValidationIssue): + """A validation result with level WARNING.""" + + level: ValidationIssueSeverity = field( + default=ValidationIssueSeverity.WARNING, init=False + ) + + def __post_init__(self): + if self.task is None: + self.task = self._get_task_name() class ValidationResultList(list[ValidationIssue]): @@ -139,17 +126,25 @@ def log( *, logger: logging.Logger = logger, min_level: ValidationIssueSeverity = ValidationIssueSeverity.INFO, + max_level: ValidationIssueSeverity = ValidationIssueSeverity.CRITICAL, ): - """Log the validation results.""" + """Log the validation results. + + :param logger: The logger to use for logging. + Defaults to the module logger. + :param min_level: The minimum severity level to log. + :param max_level: The maximum severity level to log. + """ for result in self: - if result.level < min_level: + if result.level < min_level or result.level > max_level: continue + msg = f"{result.level.name}: {result.message} [{result.task}]" if result.level == ValidationIssueSeverity.INFO: - logger.info(result.message) + logger.info(msg) elif result.level == ValidationIssueSeverity.WARNING: - logger.warning(result.message) + logger.warning(msg) elif result.level >= ValidationIssueSeverity.ERROR: - logger.error(result.message) + logger.error(msg) if not self: logger.info("PEtab format check completed successfully.") @@ -167,7 +162,7 @@ def lint_problem(problem: Problem | str | Path) -> ValidationResultList: Arguments: problem: PEtab problem to check. Instance of :class:`Problem` or path - to a PEtab problem yaml file. + to a PEtab problem YAML file. Returns: A list of validation results. Empty if no issues were found. """ @@ -195,6 +190,38 @@ def __call__(self, *args, **kwargs): return self.run(*args, **kwargs) +class CheckProblemConfig(ValidationTask): + """A task to validate the configuration of a PEtab problem. + + This corresponds to checking the problem YAML file semantics. + """ + + def run(self, problem: Problem) -> ValidationIssue | None: + if (config := problem.config) is None or config.base_path is None: + # This is allowed, so we can validate in-memory problems + # that don't have the list of files populated + return None + # TODO: decide when this should be emitted + # return ValidationWarning("Problem configuration is missing.") + + # TODO: we need some option for validating partial vs full problems + # check for unset but required files + missing_files = [] + if not config.parameter_file: + missing_files.append("parameters") + + if not [p.measurement_files for p in config.problems]: + missing_files.append("measurements") + + if not [p.observable_files for p in config.problems]: + missing_files.append("observables") + + if missing_files: + return ValidationError( + f"Missing files: {', '.join(missing_files)}" + ) + + class CheckModel(ValidationTask): """A task to validate the model of a PEtab problem.""" @@ -207,134 +234,133 @@ def run(self, problem: Problem) -> ValidationIssue | None: return ValidationError("Model is invalid.") -class CheckTableExists(ValidationTask): - """A task to check if a table exists in the PEtab problem.""" - - def __init__(self, table_name: str): - if table_name not in ["measurement", "observable", "parameter"]: - # all others are optional - raise ValueError( - f"Table name {table_name} is not supported. " - "Supported table names are 'measurement', 'observable', " - "'parameter'." - ) - self.table_name = table_name +class CheckMeasuredObservablesDefined(ValidationTask): + """A task to check that all observables referenced by the measurements + are defined.""" def run(self, problem: Problem) -> ValidationIssue | None: - if getattr(problem, f"{self.table_name}_df") is None: - return ValidationError(f"{self.table_name} table is missing.") - + used_observables = { + m.observable_id for m in problem.measurement_table.measurements + } + defined_observables = { + o.id for o in problem.observable_table.observables + } + if undefined_observables := (used_observables - defined_observables): + return ValidationError( + f"Observables {undefined_observables} used in " + "measurement table but not defined in observable table." + ) -class CheckValidPetabIdColumn(ValidationTask): - """A task to check that a given column contains only valid PEtab IDs.""" - def __init__( - self, - table_name: str, - column_name: str, - required_column: bool = True, - ignore_nan: bool = False, - ): - self.table_name = table_name - self.column_name = column_name - self.required_column = required_column - self.ignore_nan = ignore_nan +class CheckOverridesMatchPlaceholders(ValidationTask): + """A task to check that the number of observable/noise parameters + in the measurements match the number of placeholders in the observables.""" def run(self, problem: Problem) -> ValidationIssue | None: - df = getattr(problem, f"{self.table_name}_df") - if df is None: - return - - if self.column_name not in df.columns: - if self.required_column: - return ValidationError( - f"Column {self.column_name} is missing in " - f"{self.table_name} table." + observable_parameters_count = { + o.id: len(o.observable_placeholders) + for o in problem.observable_table.observables + } + noise_parameters_count = { + o.id: len(o.noise_placeholders) + for o in problem.observable_table.observables + } + messages = [] + for m in problem.measurement_table.measurements: + # check observable parameters + try: + expected = observable_parameters_count[m.observable_id] + except KeyError: + messages.append( + f"Observable {m.observable_id} used in measurement " + f"table is not defined." ) - return + continue - try: - ids = df[self.column_name].values - if self.ignore_nan: - ids = ids[~pd.isna(ids)] - check_ids(ids, kind=self.column_name) - except ValueError as e: - return ValidationError(str(e)) + actual = len(m.observable_parameters) + if actual != expected: + formula = problem.observable_table[m.observable_id].formula + messages.append( + f"Mismatch of observable parameter overrides for " + f"{m.observable_id} ({formula})" + f"in:\n{m}\n" + f"Expected {expected} but got {actual}" + ) -class CheckMeasurementTable(ValidationTask): - """A task to validate the measurement table of a PEtab problem.""" + # check noise parameters + expected = noise_parameters_count[m.observable_id] + actual = len(m.noise_parameters) + if actual != expected: + # no overrides defined, but a numerical sigma can be provided + # anyway + if len(m.noise_parameters) != 1 or ( + len(m.noise_parameters) == 1 + and m.noise_parameters[0].is_number + ): + messages.append( + "No placeholders have been specified in the " + f"noise model for observable {m.observable_id}, " + "but a parameter ID " + "or multiple overrides were specified in the " + "noiseParameters column." + ) + else: + formula = problem.observable_table[ + m.observable_id + ].noise_formula + messages.append( + f"Mismatch of noise parameter overrides for " + f"{m.observable_id} ({formula})" + f"in:\n{m}\n" + f"Expected {expected} but got {actual}" + ) - def run(self, problem: Problem) -> ValidationIssue | None: - if problem.measurement_df is None: - return + if messages: + return ValidationError("\n".join(messages)) - df = problem.measurement_df - try: - _check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement") - for column_name in MEASUREMENT_DF_REQUIRED_COLS: - if not np.issubdtype(df[column_name].dtype, np.number): - assert_no_leading_trailing_whitespace( - df[column_name].values, column_name - ) +class CheckPosLogMeasurements(ValidationTask): + """Check that measurements for observables with + log-transformation are positive.""" - for column_name in MEASUREMENT_DF_OPTIONAL_COLS: - if column_name in df and not np.issubdtype( - df[column_name].dtype, np.number - ): - assert_no_leading_trailing_whitespace( - df[column_name].values, column_name + def run(self, problem: Problem) -> ValidationIssue | None: + from .core import ObservableTransformation as ot + + log_observables = { + o.id + for o in problem.observable_table.observables + if o.transformation in [ot.LOG, ot.LOG10] + } + if log_observables: + for m in problem.measurement_table.measurements: + if m.measurement <= 0 and m.observable_id in log_observables: + return ValidationError( + "Measurements with observable " + f"log transformation must be " + f"positive, but {m.measurement} <= 0 for {m}" ) - if problem.observable_df is not None: - assert_measured_observables_defined(df, problem.observable_df) - assert_overrides_match_parameter_count( - df, problem.observable_df - ) - if OBSERVABLE_TRANSFORMATION in problem.observable_df: - # Check for positivity of measurements in case of - # log-transformation - assert_unique_observable_ids(problem.observable_df) - # If the above is not checked, in the following loop - # trafo may become a pandas Series - for measurement, obs_id in zip( - df[MEASUREMENT], df[OBSERVABLE_ID], strict=True - ): - trafo = problem.observable_df.loc[ - obs_id, OBSERVABLE_TRANSFORMATION - ] - if measurement <= 0.0 and trafo in [LOG, LOG10]: - raise ValueError( - "Measurements with observable " - f"transformation {trafo} must be " - f"positive, but {measurement} <= 0." - ) - - assert_measurements_not_null(df) - assert_measurements_numeric(df) - except AssertionError as e: - return ValidationError(str(e)) +class CheckMeasuredExperimentsDefined(ValidationTask): + """A task to check that all experiments referenced by measurements + are defined.""" + def run(self, problem: Problem) -> ValidationIssue | None: # TODO: introduce some option for validation of partial vs full # problem. if this is supposed to be a complete problem, a missing # condition table should be an error if the measurement table refers # to conditions, otherwise it should maximally be a warning - used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values) - # handle default-experiment - used_experiments = set( - filter( - lambda x: not pd.isna(x), - used_experiments, - ) - ) + used_experiments = { + m.experiment_id + for m in problem.measurement_table.measurements + if m.experiment_id is not None + } + # check that measured experiments exist - available_experiments = ( - set(problem.experiment_df[EXPERIMENT_ID].unique()) - if problem.experiment_df is not None - else set() - ) + available_experiments = { + e.id for e in problem.experiment_table.experiments + } if missing_experiments := (used_experiments - available_experiments): return ValidationError( "Measurement table references experiments that " @@ -343,78 +369,85 @@ def run(self, problem: Problem) -> ValidationIssue | None: ) -class CheckConditionTable(ValidationTask): - """A task to validate the condition table of a PEtab problem.""" +class CheckValidConditionTargets(ValidationTask): + """Check that all condition table targets are valid.""" def run(self, problem: Problem) -> ValidationIssue | None: - if problem.condition_df is None: - return - - df = problem.condition_df - - try: - _check_df(df, CONDITION_DF_REQUIRED_COLS, "condition") - check_ids(df[CONDITION_ID], kind="condition") - check_ids(df[TARGET_ID], kind="target") - except AssertionError as e: - return ValidationError(str(e)) - - # TODO: check value types - - if problem.model is None: - return - - # check targets are valid allowed_targets = set( problem.model.get_valid_ids_for_condition_table() ) - if problem.observable_df is not None: - allowed_targets |= set( - get_output_parameters( - model=problem.model, - observable_df=problem.observable_df, - mapping_df=problem.mapping_df, - ) - ) - if problem.mapping_df is not None: - allowed_targets |= set(problem.mapping_df.index.values) - invalid = set(df[TARGET_ID].unique()) - allowed_targets - if invalid: + allowed_targets |= set(get_output_parameters(problem)) + allowed_targets |= { + m.petab_id + for m in problem.mapping_table.mappings + if m.model_id is not None + } + + used_targets = { + change.target_id + for cond in problem.condition_table.conditions + for change in cond.changes + } + + if invalid := (used_targets - allowed_targets): return ValidationError( f"Condition table contains invalid targets: {invalid}" ) - # TODO check that all value types are valid for the given targets +class CheckUniquePrimaryKeys(ValidationTask): + """Check that all primary keys are unique.""" -class CheckObservableTable(ValidationTask): - """A task to validate the observable table of a PEtab problem.""" + def run(self, problem: Problem) -> ValidationIssue | None: + # TODO: check that IDs are globally unique + # -- replaces CheckObservablesDoNotShadowModelEntities - def run(self, problem: Problem): - if problem.observable_df is None: - return + # check for uniqueness of all primary keys + counter = Counter(c.id for c in problem.condition_table.conditions) + duplicates = {id for id, count in counter.items() if count > 1} - try: - check_observable_df( - problem.observable_df, + if duplicates: + return ValidationError( + f"Condition table contains duplicate IDs: {duplicates}" ) - except AssertionError as e: - return ValidationIssue( - level=ValidationIssueSeverity.ERROR, message=str(e) + + counter = Counter(o.id for o in problem.observable_table.observables) + duplicates = {id for id, count in counter.items() if count > 1} + + if duplicates: + return ValidationError( + f"Observable table contains duplicate IDs: {duplicates}" + ) + + counter = Counter(e.id for e in problem.experiment_table.experiments) + duplicates = {id for id, count in counter.items() if count > 1} + + if duplicates: + return ValidationError( + f"Experiment table contains duplicate IDs: {duplicates}" + ) + + counter = Counter(p.id for p in problem.parameter_table.parameters) + duplicates = {id for id, count in counter.items() if count > 1} + + if duplicates: + return ValidationError( + f"Parameter table contains duplicate IDs: {duplicates}" ) class CheckObservablesDoNotShadowModelEntities(ValidationTask): """A task to check that observable IDs do not shadow model entities.""" + # TODO: all PEtab entity IDs must be disjoint from the model entity IDs def run(self, problem: Problem) -> ValidationIssue | None: - if problem.observable_df is None or problem.model is None: - return + if not problem.observable_table.observables or problem.model is None: + return None shadowed_entities = [ - obs_id - for obs_id in problem.observable_df.index - if problem.model.has_entity_with_id(obs_id) + o.id + for o in problem.observable_table.observables + if problem.model.has_entity_with_id(o.id) ] if shadowed_entities: return ValidationError( @@ -422,103 +455,23 @@ def run(self, problem: Problem) -> ValidationIssue | None: ) -class CheckParameterTable(ValidationTask): - """A task to validate the parameter table of a PEtab problem.""" - - def run(self, problem: Problem) -> ValidationIssue | None: - if problem.parameter_df is None: - return - - try: - df = problem.parameter_df - _check_df(df, PARAMETER_DF_REQUIRED_COLS[1:], "parameter") - - if df.index.name != PARAMETER_ID: - return ValidationError( - f"Parameter table has wrong index {df.index.name}." - f" Expected {PARAMETER_ID}.", - ) - - check_ids(df.index.values, kind="parameter") - - for column_name in PARAMETER_DF_REQUIRED_COLS[ - 1: - ]: # 0 is PARAMETER_ID - if not np.issubdtype(df[column_name].dtype, np.number): - assert_no_leading_trailing_whitespace( - df[column_name].values, column_name - ) - - # nominal value is required for non-estimated parameters - non_estimated_par_ids = list( - df.index[ - (df[ESTIMATE] != 1) - | ( - pd.api.types.is_string_dtype(df[ESTIMATE]) - and df[ESTIMATE] != "1" - ) - ] - ) - # TODO implement as validators - # `assert_has_fixed_parameter_nominal_values` - # and `assert_correct_table_dtypes` - if non_estimated_par_ids: - if NOMINAL_VALUE not in df: - return ValidationError( - "Parameter table contains parameters " - f"{non_estimated_par_ids} that are not " - "specified to be estimated, " - f"but column {NOMINAL_VALUE} is missing." - ) - try: - df.loc[non_estimated_par_ids, NOMINAL_VALUE].apply(float) - except ValueError: - return ValidationError( - f"Expected numeric values for `{NOMINAL_VALUE}` " - "in parameter table " - "for all non-estimated parameters." - ) - - assert_parameter_id_is_string(df) - assert_parameter_scale_is_valid(df) - assert_parameter_bounds_are_numeric(df) - assert_parameter_estimate_is_boolean(df) - assert_unique_parameter_ids(df) - check_parameter_bounds(df) - assert_parameter_prior_type_is_valid(df) - assert_parameter_prior_parameters_are_valid(df) - - except AssertionError as e: - return ValidationError(str(e)) - - class CheckExperimentTable(ValidationTask): """A task to validate the experiment table of a PEtab problem.""" def run(self, problem: Problem) -> ValidationIssue | None: - if problem.experiment_df is None: - return - - df = problem.experiment_df - - try: - _check_df(df, EXPERIMENT_DF_REQUIRED_COLS, "experiment") - except AssertionError as e: - return ValidationError(str(e)) + messages = [] + for experiment in problem.experiment_table.experiments: + # Check that there are no duplicate timepoints + counter = Counter(period.time for period in experiment.periods) + duplicates = {time for time, count in counter.items() if count > 1} + if duplicates: + messages.append( + f"Experiment {experiment.id} contains duplicate " + f"timepoints: {duplicates}" + ) - # valid timepoints - invalid = [] - for time in df[TIME].values: - try: - time = float(time) - if not np.isfinite(time) and time != -np.inf: - invalid.append(time) - except ValueError: - invalid.append(time) - if invalid: - return ValidationError( - f"Invalid timepoints in experiment table: {invalid}" - ) + if messages: + return ValidationError("\n".join(messages)) class CheckExperimentConditionsExist(ValidationTask): @@ -526,30 +479,24 @@ class CheckExperimentConditionsExist(ValidationTask): in the condition table.""" def run(self, problem: Problem) -> ValidationIssue | None: - if problem.experiment_df is None: - return - - if ( - problem.condition_df is None - and problem.experiment_df is not None - and not problem.experiment_df.empty - ): - return ValidationError( - "Experiment table is non-empty, " - "but condition table is missing." - ) + messages = [] + available_conditions = { + c.id for c in problem.condition_table.conditions + } + for experiment in problem.experiment_table.experiments: + missing_conditions = { + period.condition_id + for period in experiment.periods + if period.condition_id is not None + } - available_conditions + if missing_conditions: + messages.append( + f"Experiment {experiment.id} requires conditions that are " + f"not present in the condition table: {missing_conditions}" + ) - required_conditions = problem.experiment_df[CONDITION_ID].unique() - existing_conditions = problem.condition_df[CONDITION_ID].unique() - - missing_conditions = set(required_conditions) - set( - existing_conditions - ) - if missing_conditions: - return ValidationError( - f"Experiment table contains conditions that are not present " - f"in the condition table: {missing_conditions}" - ) + if messages: + return ValidationError("\n".join(messages)) class CheckAllParametersPresentInParameterTable(ValidationTask): @@ -557,34 +504,25 @@ class CheckAllParametersPresentInParameterTable(ValidationTask): with no additional ones.""" def run(self, problem: Problem) -> ValidationIssue | None: - if ( - problem.model is None - or problem.parameter_df is None - or problem.observable_df is None - or problem.measurement_df is None - ): - return + if problem.model is None: + return None required = get_required_parameters_for_parameter_table(problem) allowed = get_valid_parameters_for_parameter_table(problem) - actual = set(problem.parameter_df.index) + actual = {p.id for p in problem.parameter_table.parameters} missing = required - actual extraneous = actual - allowed # missing parameters might be present under a different name based on # the mapping table - if missing and problem.mapping_df is not None: + if missing: model_to_petab_mapping = {} - for map_from, map_to in zip( - problem.mapping_df.index.values, - problem.mapping_df[MODEL_ENTITY_ID], - strict=True, - ): - if map_to in model_to_petab_mapping: - model_to_petab_mapping[map_to].append(map_from) + for m in problem.mapping_table.mappings: + if m.model_id in model_to_petab_mapping: + model_to_petab_mapping[m.model_id].append(m.petab_id) else: - model_to_petab_mapping[map_to] = [map_from] + model_to_petab_mapping[m.model_id] = [m.petab_id] missing = { missing_id for missing_id in missing @@ -613,23 +551,116 @@ class CheckValidParameterInConditionOrParameterTable(ValidationTask): present in the condition or parameter table.""" def run(self, problem: Problem) -> ValidationIssue | None: - if ( - problem.model is None - or problem.condition_df is None - or problem.parameter_df is None - ): - return - - try: - assert_model_parameters_in_condition_or_parameter_table( - problem.model, - problem.condition_df, - problem.parameter_df, - problem.mapping_df, + if problem.model is None: + return None + + allowed_in_condition_cols = set( + problem.model.get_valid_ids_for_condition_table() + ) + allowed_in_condition_cols |= { + m.petab_id + for m in problem.mapping_table.mappings + if not pd.isna(m.model_id) + and ( + # mapping table entities mapping to already allowed parameters + m.model_id in allowed_in_condition_cols + # mapping table entities mapping to species + or problem.model.is_state_variable(m.model_id) ) - except AssertionError as e: - return ValidationIssue( - level=ValidationIssueSeverity.ERROR, message=str(e) + } + + allowed_in_parameter_table = get_valid_parameters_for_parameter_table( + problem + ) + + entities_in_condition_table = { + change.target_id + for cond in problem.condition_table.conditions + for change in cond.changes + } + entities_in_parameter_table = { + p.id for p in problem.parameter_table.parameters + } + + disallowed_in_condition = { + x + for x in (entities_in_condition_table - allowed_in_condition_cols) + # we only check model entities here, not output parameters + if problem.model.has_entity_with_id(x) + } + if disallowed_in_condition: + is_or_are = "is" if len(disallowed_in_condition) == 1 else "are" + return ValidationError( + f"{disallowed_in_condition} {is_or_are} not " + "allowed to occur in condition table " + "columns." + ) + + disallowed_in_parameters = { + x + for x in (entities_in_parameter_table - allowed_in_parameter_table) + # we only check model entities here, not output parameters + if problem.model.has_entity_with_id(x) + } + + if disallowed_in_parameters: + is_or_are = "is" if len(disallowed_in_parameters) == 1 else "are" + return ValidationError( + f"{disallowed_in_parameters} {is_or_are} not " + "allowed to occur in the parameters table." + ) + + in_both = entities_in_condition_table & entities_in_parameter_table + if in_both: + is_or_are = "is" if len(in_both) == 1 else "are" + return ValidationError( + f"{in_both} {is_or_are} present in both " + "the condition table and the parameter table." + ) + + +class CheckUnusedExperiments(ValidationTask): + """A task to check for experiments that are not used in the measurements + table.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + used_experiments = { + m.experiment_id + for m in problem.measurement_table.measurements + if m.experiment_id is not None + } + available_experiments = { + e.id for e in problem.experiment_table.experiments + } + + unused_experiments = available_experiments - used_experiments + if unused_experiments: + return ValidationWarning( + f"Experiments {unused_experiments} are not used in the " + "measurements table." + ) + + +class CheckUnusedConditions(ValidationTask): + """A task to check for conditions that are not used in the experiments + table.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + used_conditions = { + p.condition_id + for e in problem.experiment_table.experiments + for p in e.periods + if p.condition_id is not None + } + available_conditions = { + c.id for c in problem.condition_table.conditions + } + + unused_conditions = available_conditions - used_conditions + if unused_conditions: + return ValidationWarning( + f"Conditions {unused_conditions} are not used in the " + "experiments table." ) @@ -638,7 +669,9 @@ class CheckVisualizationTable(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: if problem.visualization_df is None: - return + return None + + from ..v1.visualize.lint import validate_visualization_df if validate_visualization_df(problem): return ValidationIssue( @@ -651,14 +684,9 @@ def get_valid_parameters_for_parameter_table( problem: Problem, ) -> set[str]: """ - Get set of parameters which may be present inside the parameter table + Get the set of parameters which may be present inside the parameter table - Arguments: - model: PEtab model - condition_df: PEtab condition table - observable_df: PEtab observable table - measurement_df: PEtab measurement table - mapping_df: PEtab mapping table for additional checks + :param problem: The PEtab problem Returns: Set of parameter IDs which PEtab allows to be present in the @@ -672,71 +700,50 @@ def get_valid_parameters_for_parameter_table( # - remove parameters for which condition table columns exist # - remove placeholder parameters # (only partial overrides are not supported) - model = problem.model - condition_df = problem.condition_df - observable_df = problem.observable_df - measurement_df = problem.measurement_df - mapping_df = problem.mapping_df # must not go into parameter table - blackset = set() + invalid = set(get_placeholders(problem)) - if observable_df is not None: - placeholders = set(get_placeholders(observable_df)) - - # collect assignment targets - blackset |= placeholders - - if condition_df is not None: - blackset |= set(condition_df.columns.values) - {CONDITION_NAME} + # condition table targets + invalid |= { + change.target_id + for cond in problem.condition_table.conditions + for change in cond.changes + } # don't use sets here, to have deterministic ordering, - # e.g. for creating parameter tables + # e.g., for creating parameter tables parameter_ids = OrderedDict.fromkeys( p - for p in model.get_valid_parameters_for_parameter_table() - if p not in blackset + for p in problem.model.get_valid_parameters_for_parameter_table() + if p not in invalid ) - if mapping_df is not None: - for from_id, to_id in mapping_df[MODEL_ENTITY_ID].items(): - if to_id in parameter_ids.keys(): - parameter_ids[from_id] = None + for mapping in problem.mapping_table.mappings: + if mapping.model_id and mapping.model_id in parameter_ids.keys(): + parameter_ids[mapping.petab_id] = None - if observable_df is not None: - # add output parameters from observables table - output_parameters = get_output_parameters( - observable_df=observable_df, model=model - ) - for p in output_parameters: - if p not in blackset: - parameter_ids[p] = None + # add output parameters from observables table + output_parameters = get_output_parameters(problem) + for p in output_parameters: + if p not in invalid: + parameter_ids[p] = None # Append parameters from measurement table, unless they occur as condition # table columns def append_overrides(overrides): for p in overrides: - if isinstance(p, str) and p not in blackset: - parameter_ids[p] = None - - if measurement_df is not None: - for _, row in measurement_df.iterrows(): - # we trust that the number of overrides matches - append_overrides( - split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None) - ) - ) - append_overrides( - split_parameter_replacement_list( - row.get(NOISE_PARAMETERS, None) - ) - ) + if isinstance(p, sp.Symbol) and (str_p := str(p)) not in invalid: + parameter_ids[str_p] = None + + for measurement in problem.measurement_table.measurements: + # we trust that the number of overrides matches + append_overrides(measurement.observable_parameters) + append_overrides(measurement.noise_parameters) # Append parameter overrides from condition table - if condition_df is not None: - for p in v2.conditions.get_condition_table_free_symbols(problem): - parameter_ids[str(p)] = None + for p in problem.condition_table.free_symbols: + parameter_ids[str(p)] = None return set(parameter_ids.keys()) @@ -756,34 +763,30 @@ def get_required_parameters_for_parameter_table( that are not defined in the model. """ parameter_ids = set() + condition_targets = { + change.target_id + for cond in problem.condition_table.conditions + for change in cond.changes + } # Add parameters from measurement table, unless they are fixed parameters def append_overrides(overrides): parameter_ids.update( - p + str_p for p in overrides - if isinstance(p, str) - and ( - problem.condition_df is None - or p not in problem.condition_df[TARGET_ID] - ) + if isinstance(p, sp.Symbol) + and (str_p := str(p)) not in condition_targets ) - for _, row in problem.measurement_df.iterrows(): + for m in problem.measurement_table.measurements: # we trust that the number of overrides matches - append_overrides( - split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None) - ) - ) - append_overrides( - split_parameter_replacement_list(row.get(NOISE_PARAMETERS, None)) - ) + append_overrides(m.observable_parameters) + append_overrides(m.noise_parameters) - # remove `observable_ids` when - # `get_output_parameters` is updated for PEtab v2/v1.1, where - # observable IDs are allowed in observable formulae - observable_ids = set(problem.observable_df.index) + # TODO remove `observable_ids` when + # `get_output_parameters` is updated for PEtab v2/v1.1, where + # observable IDs are allowed in observable formulae + observable_ids = {o.id for o in problem.observable_table.observables} # Add output parameters except for placeholders for formula_type, placeholder_sources in ( @@ -801,13 +804,11 @@ def append_overrides(overrides): ), ): output_parameters = get_output_parameters( - problem.observable_df, - problem.model, - mapping_df=problem.mapping_df, + problem, **formula_type, ) placeholders = get_placeholders( - problem.observable_df, + problem, **placeholder_sources, ) parameter_ids.update( @@ -820,35 +821,119 @@ def append_overrides(overrides): # model parameter_ids.update( str(p) - for p in v2.conditions.get_condition_table_free_symbols(problem) + for p in problem.condition_table.free_symbols if not problem.model.has_entity_with_id(str(p)) ) # parameters that are overridden via the condition table are not allowed - if problem.condition_df is not None: - parameter_ids -= set(problem.condition_df[TARGET_ID].unique()) + parameter_ids -= condition_targets return parameter_ids +def get_output_parameters( + problem: Problem, + observables: bool = True, + noise: bool = True, +) -> list[str]: + """Get output parameters + + Returns IDs of parameters used in observable and noise formulas that are + not defined in the model. + + Arguments: + problem: The PEtab problem + observables: Include parameters from observableFormulas + noise: Include parameters from noiseFormulas + + Returns: + List of output parameter IDs + """ + formulas = [] + if observables: + formulas.extend( + o.formula for o in problem.observable_table.observables + ) + if noise: + formulas.extend( + o.noise_formula for o in problem.observable_table.observables + ) + output_parameters = OrderedDict() + + for formula in formulas: + free_syms = sorted( + formula.free_symbols, + key=lambda symbol: symbol.name, + ) + for free_sym in free_syms: + sym = str(free_sym) + if problem.model.symbol_allowed_in_observable_formula(sym): + continue + + # does it map to a model entity? + + if ( + (mapped := problem.mapping_table.get(sym)) is not None + and mapped.model_id is not None + and problem.model.symbol_allowed_in_observable_formula( + mapped.model_id + ) + ): + continue + + output_parameters[sym] = None + + return list(output_parameters.keys()) + + +def get_placeholders( + problem: Problem, + observables: bool = True, + noise: bool = True, +) -> list[str]: + """Get all placeholder parameters from observable table observableFormulas + and noiseFormulas. + + Arguments: + problem: The PEtab problem + observables: Include parameters from observableFormulas + noise: Include parameters from noiseFormulas + + Returns: + List of placeholder parameters from observable table observableFormulas + and noiseFormulas. + """ + # collect placeholder parameters overwritten by + # {observable,noise}Parameters + placeholders = [] + for o in problem.observable_table.observables: + if observables: + placeholders.extend(map(str, o.observable_placeholders)) + if noise: + placeholders.extend(map(str, o.noise_placeholders)) + + from ..v1.core import unique_preserve_order + + return unique_preserve_order(placeholders) + + #: Validation tasks that should be run on any PEtab problem default_validation_tasks = [ - CheckTableExists("measurement"), - CheckTableExists("observable"), - CheckTableExists("parameter"), + CheckProblemConfig(), CheckModel(), - CheckMeasurementTable(), - CheckConditionTable(), + CheckUniquePrimaryKeys(), + CheckMeasuredObservablesDefined(), + CheckPosLogMeasurements(), + CheckOverridesMatchPlaceholders(), + CheckValidConditionTargets(), CheckExperimentTable(), - CheckValidPetabIdColumn("measurement", EXPERIMENT_ID, ignore_nan=True), - CheckValidPetabIdColumn("experiment", EXPERIMENT_ID), - CheckValidPetabIdColumn("experiment", CONDITION_ID), CheckExperimentConditionsExist(), - CheckObservableTable(), CheckObservablesDoNotShadowModelEntities(), - CheckParameterTable(), CheckAllParametersPresentInParameterTable(), + CheckValidParameterInConditionOrParameterTable(), + CheckUnusedExperiments(), + CheckUnusedConditions(), # TODO: atomize checks, update to long condition table, re-enable # CheckVisualizationTable(), - CheckValidParameterInConditionOrParameterTable(), + # TODO validate mapping table ] diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 7f675db0..d7b6fb68 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -1,9 +1,12 @@ """Convert PEtab version 1 problems to version 2.""" +from __future__ import annotations + import shutil from contextlib import suppress from itertools import chain from pathlib import Path +from tempfile import TemporaryDirectory from urllib.parse import urlparse from uuid import uuid4 @@ -14,36 +17,48 @@ from ..v1.yaml import get_path_prefix, load_yaml, validate from ..versions import get_major_version from .models import MODEL_TYPE_SBML -from .problem import ProblemConfig __all__ = ["petab1to2"] -def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): +def petab1to2( + yaml_config: Path | str, output_dir: Path | str = None +) -> v2.Problem | None: """Convert from PEtab 1.0 to PEtab 2.0 format. Convert a PEtab problem from PEtab 1.0 to PEtab 2.0 format. - Parameters - ---------- - yaml_config: dict | Path | str + :param yaml_config: The PEtab problem as dictionary or YAML file name. - output_dir: Path | str + :param output_dir: The output directory to save the converted PEtab problem, or ``None``, to return a :class:`petab.v2.Problem` instance. - Raises - ------ - ValueError + :raises ValueError: If the input is invalid or does not pass linting or if the generated files do not pass linting. """ - if output_dir is None: - # TODO requires petab.v2.Problem - raise NotImplementedError("Not implemented yet.") - elif isinstance(yaml_config, dict): - raise ValueError("If output_dir is given, yaml_config must be a file.") + if output_dir is not None: + return petab_files_1to2(yaml_config, output_dir) + + with TemporaryDirectory() as tmp_dir: + petab_files_1to2(yaml_config, tmp_dir) + return v2.Problem.from_yaml(Path(tmp_dir, Path(yaml_config).name)) + + +def petab_files_1to2(yaml_config: Path | str, output_dir: Path | str): + """Convert PEtab files from PEtab 1.0 to PEtab 2.0. + + + :param yaml_config: + The PEtab problem as dictionary or YAML file name. + :param output_dir: + The output directory to save the converted PEtab problem. + :raises ValueError: + If the input is invalid or does not pass linting or if the generated + files do not pass linting. + """ if isinstance(yaml_config, Path | str): yaml_file = str(yaml_config) path_prefix = get_path_prefix(yaml_file) @@ -56,11 +71,12 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): get_dest_path = lambda filename: f"{output_dir}/{filename}" # noqa: E731 - # Validate original PEtab problem + # Validate the original PEtab problem validate(yaml_config, path_prefix=path_prefix) if get_major_version(yaml_config) != 1: raise ValueError("PEtab problem is not version 1.") petab_problem = v1.Problem.from_yaml(yaml_file or yaml_config) + # TODO: move to mapping table # get rid of conditionName column if present (unsupported in v2) petab_problem.condition_df = petab_problem.condition_df.drop( columns=[v1.C.CONDITION_NAME], errors="ignore" @@ -72,7 +88,7 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): # Update YAML file new_yaml_config = _update_yaml(yaml_config) - new_yaml_config = ProblemConfig(**new_yaml_config) + new_yaml_config = v2.ProblemConfig(**new_yaml_config) # Update tables # condition tables, observable tables, SBML files, parameter table: @@ -218,7 +234,7 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: measurement_df, get_dest_path(measurement_file) ) - # Write new YAML file + # Write the new YAML file new_yaml_file = output_dir / Path(yaml_file).name new_yaml_config.to_yaml(new_yaml_file) @@ -226,10 +242,19 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: validation_issues = v2.lint_problem(new_yaml_file) if validation_issues: - raise ValueError( - "Generated PEtab v2 problem did not pass linting: " - f"{validation_issues}" + sev = v2.lint.ValidationIssueSeverity + validation_issues.log(max_level=sev.WARNING) + errors = "\n".join( + map( + str, + (i for i in validation_issues if i.level > sev.WARNING), + ) ) + if errors: + raise ValueError( + "The generated PEtab v2 problem did not pass linting: " + f"{errors}" + ) def _update_yaml(yaml_config: dict) -> dict: @@ -287,7 +312,7 @@ def v1v2_condition_df( condition_df = condition_df.copy().reset_index() with suppress(KeyError): # conditionName was dropped in PEtab v2 - condition_df.drop(columns=[v2.C.CONDITION_NAME], inplace=True) + condition_df.drop(columns=[v1.C.CONDITION_NAME], inplace=True) condition_df = condition_df.melt( id_vars=[v1.C.CONDITION_ID], @@ -301,24 +326,8 @@ def v1v2_condition_df( columns=[ v2.C.CONDITION_ID, v2.C.TARGET_ID, - v2.C.OPERATION_TYPE, v2.C.TARGET_VALUE, ] ) - targets = set(condition_df[v2.C.TARGET_ID].unique()) - valid_cond_pars = set(model.get_valid_parameters_for_parameter_table()) - # entities to which we assign constant values - constant = targets & valid_cond_pars - # entities to which we assign initial values - initial = set() - for target in targets - constant: - if model.is_state_variable(target): - initial.add(target) - else: - raise NotImplementedError( - f"Unable to determine value type {target} in the condition " - "table." - ) - condition_df[v2.C.OPERATION_TYPE] = v2.C.OT_CUR_VAL return condition_df diff --git a/petab/v2/problem.py b/petab/v2/problem.py index d18b4b7c..2b564e0c 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -13,10 +13,10 @@ from typing import TYPE_CHECKING import pandas as pd +import sympy as sp from pydantic import AnyUrl, BaseModel, Field from ..v1 import ( - core, mapping, measurements, observables, @@ -25,17 +25,18 @@ sampling, yaml, ) +from ..v1.core import concat_tables, get_visualization_df from ..v1.models.model import Model, model_factory from ..v1.yaml import get_path_prefix from ..v2.C import * # noqa: F403 from ..versions import parse_version -from . import conditions, experiments +from . import conditions, core, experiments if TYPE_CHECKING: from ..v2.lint import ValidationResultList, ValidationTask -__all__ = ["Problem"] +__all__ = ["Problem", "ProblemConfig"] class Problem: @@ -49,135 +50,102 @@ class Problem: - experiment table - measurement table - parameter table - - observables table + - observable table - mapping table Optionally, it may contain visualization tables. See also :doc:`petab:v2/documentation_data_format`. - - Parameters: - condition_df: PEtab condition table - experiment_df: PEtab experiment table - measurement_df: PEtab measurement table - parameter_df: PEtab parameter table - observable_df: PEtab observable table - visualization_df: PEtab visualization table - mapping_df: PEtab mapping table - model: The underlying model - extensions_config: Information on the extensions used """ def __init__( self, model: Model = None, - condition_df: pd.DataFrame = None, - experiment_df: pd.DataFrame = None, - measurement_df: pd.DataFrame = None, - parameter_df: pd.DataFrame = None, + condition_table: core.ConditionTable = None, + experiment_table: core.ExperimentTable = None, + observable_table: core.ObservableTable = None, + measurement_table: core.MeasurementTable = None, + parameter_table: core.ParameterTable = None, + mapping_table: core.MappingTable = None, visualization_df: pd.DataFrame = None, - observable_df: pd.DataFrame = None, - mapping_df: pd.DataFrame = None, - extensions_config: dict = None, config: ProblemConfig = None, ): from ..v2.lint import default_validation_tasks - self.condition_df: pd.DataFrame | None = condition_df - self.experiment_df: pd.DataFrame | None = experiment_df - self.measurement_df: pd.DataFrame | None = measurement_df - self.parameter_df: pd.DataFrame | None = parameter_df - self.visualization_df: pd.DataFrame | None = visualization_df - self.observable_df: pd.DataFrame | None = observable_df - self.mapping_df: pd.DataFrame | None = mapping_df + self.config = config self.model: Model | None = model - self.extensions_config = extensions_config or {} self.validation_tasks: list[ValidationTask] = ( default_validation_tasks.copy() ) - self.config = config - from .core import ( - ChangeSet, - ConditionsTable, - Experiment, - ExperimentsTable, - MappingTable, - MeasurementTable, - Observable, - ObservablesTable, - ParameterTable, + self.observable_table = observable_table or core.ObservableTable( + observables=[] ) - - self.observables_table: ObservablesTable = ObservablesTable.from_df( - self.observable_df + self.condition_table = condition_table or core.ConditionTable( + conditions=[] ) - self.observables: list[Observable] = self.observables_table.observables - - self.conditions_table: ConditionsTable = ConditionsTable.from_df( - self.condition_df + self.experiment_table = experiment_table or core.ExperimentTable( + experiments=[] ) - self.conditions: list[ChangeSet] = self.conditions_table.conditions - - self.experiments_table: ExperimentsTable = ExperimentsTable.from_df( - self.experiment_df + self.measurement_table = measurement_table or core.MeasurementTable( + measurements=[] ) - self.experiments: list[Experiment] = self.experiments_table.experiments - - self.measurement_table: MeasurementTable = MeasurementTable.from_df( - self.measurement_df, + self.mapping_table = mapping_table or core.MappingTable(mappings=[]) + self.parameter_table = parameter_table or core.ParameterTable( + parameters=[] ) - self.mapping_table: MappingTable = MappingTable.from_df( - self.mapping_df - ) - self.parameter_table: ParameterTable = ParameterTable.from_df( - self.parameter_df - ) - # TODO: visualization table + self.visualization_df = visualization_df def __str__(self): model = f"with model ({self.model})" if self.model else "without model" - experiments = ( - f"{self.experiment_df.shape[0]} experiments" - if self.experiment_df is not None - else "without experiments table" - ) + ne = len(self.experiment_table.experiments) + experiments = f"{ne} experiments" - conditions = ( - f"{self.condition_df.shape[0]} conditions" - if self.condition_df is not None - else "without conditions table" - ) + nc = len(self.condition_table.conditions) + conditions = f"{nc} conditions" - observables = ( - f"{self.observable_df.shape[0]} observables" - if self.observable_df is not None - else "without observables table" - ) + no = len(self.observable_table.observables) + observables = f"{no} observables" - measurements = ( - f"{self.measurement_df.shape[0]} measurements" - if self.measurement_df is not None - else "without measurements table" - ) + nm = len(self.measurement_table.measurements) + measurements = f"{nm} measurements" - if self.parameter_df is not None: - num_estimated_parameters = ( - sum(self.parameter_df[ESTIMATE] == 1) - if ESTIMATE in self.parameter_df - else self.parameter_df.shape[0] - ) - parameters = f"{num_estimated_parameters} estimated parameters" - else: - parameters = "without parameter_df table" + nest = self.parameter_table.n_estimated + parameters = f"{nest} estimated parameters" return ( f"PEtab Problem {model}, {conditions}, {experiments}, " f"{observables}, {measurements}, {parameters}" ) + def __getitem__(self, key): + """Get PEtab entity by ID. + + This allows accessing PEtab entities such as conditions, experiments, + observables, and parameters by their ID. + + Accessing model entities is not currently not supported. + """ + for table in ( + self.condition_table, + self.experiment_table, + self.observable_table, + self.measurement_table, + self.parameter_table, + self.mapping_table, + ): + if table is not None: + try: + return table[key] + except KeyError: + pass + + raise KeyError( + f"Entity with ID '{key}' not found in the PEtab problem" + ) + @staticmethod def from_yaml( yaml_config: dict | Path | str, base_path: str | Path = None @@ -235,10 +203,10 @@ def get_path(filename): if yaml.is_composite_problem(yaml_config): raise ValueError( - "petab.Problem.from_yaml() can only be used for " + "petab.v2.Problem.from_yaml() can only be used for " "yaml files comprising a single model. " "Consider using " - "petab.CompositeProblem.from_yaml() instead." + "petab.v2.CompositeProblem.from_yaml() instead." ) config = ProblemConfig( **yaml_config, base_path=base_path, filepath=yaml_file @@ -273,9 +241,7 @@ def get_path(filename): measurement_files = [get_path(f) for f in problem0.measurement_files] # If there are multiple tables, we will merge them measurement_df = ( - core.concat_tables( - measurement_files, measurements.get_measurement_df - ) + concat_tables(measurement_files, measurements.get_measurement_df) if measurement_files else None ) @@ -283,7 +249,7 @@ def get_path(filename): condition_files = [get_path(f) for f in problem0.condition_files] # If there are multiple tables, we will merge them condition_df = ( - core.concat_tables(condition_files, conditions.get_condition_df) + concat_tables(condition_files, conditions.get_condition_df) if condition_files else None ) @@ -291,7 +257,7 @@ def get_path(filename): experiment_files = [get_path(f) for f in problem0.experiment_files] # If there are multiple tables, we will merge them experiment_df = ( - core.concat_tables(experiment_files, experiments.get_experiment_df) + concat_tables(experiment_files, experiments.get_experiment_df) if experiment_files else None ) @@ -301,7 +267,7 @@ def get_path(filename): ] # If there are multiple tables, we will merge them visualization_df = ( - core.concat_tables(visualization_files, core.get_visualization_df) + concat_tables(visualization_files, get_visualization_df) if visualization_files else None ) @@ -309,7 +275,7 @@ def get_path(filename): observable_files = [get_path(f) for f in problem0.observable_files] # If there are multiple tables, we will merge them observable_df = ( - core.concat_tables(observable_files, observables.get_observable_df) + concat_tables(observable_files, observables.get_observable_df) if observable_files else None ) @@ -317,12 +283,12 @@ def get_path(filename): mapping_files = [get_path(f) for f in problem0.mapping_files] # If there are multiple tables, we will merge them mapping_df = ( - core.concat_tables(mapping_files, mapping.get_mapping_df) + concat_tables(mapping_files, mapping.get_mapping_df) if mapping_files else None ) - return Problem( + return Problem.from_dfs( condition_df=condition_df, experiment_df=experiment_df, measurement_df=measurement_df, @@ -331,20 +297,66 @@ def get_path(filename): model=model, visualization_df=visualization_df, mapping_df=mapping_df, - extensions_config=config.extensions, + config=config, + ) + + @staticmethod + def from_dfs( + model: Model = None, + condition_df: pd.DataFrame = None, + experiment_df: pd.DataFrame = None, + measurement_df: pd.DataFrame = None, + parameter_df: pd.DataFrame = None, + visualization_df: pd.DataFrame = None, + observable_df: pd.DataFrame = None, + mapping_df: pd.DataFrame = None, + config: ProblemConfig = None, + ): + """ + Construct a PEtab problem from dataframes. + + Parameters: + condition_df: PEtab condition table + experiment_df: PEtab experiment table + measurement_df: PEtab measurement table + parameter_df: PEtab parameter table + observable_df: PEtab observable table + visualization_df: PEtab visualization table + mapping_df: PEtab mapping table + model: The underlying model + config: The PEtab problem configuration + """ + + observable_table = core.ObservableTable.from_df(observable_df) + condition_table = core.ConditionTable.from_df(condition_df) + experiment_table = core.ExperimentTable.from_df(experiment_df) + measurement_table = core.MeasurementTable.from_df(measurement_df) + mapping_table = core.MappingTable.from_df(mapping_df) + parameter_table = core.ParameterTable.from_df(parameter_df) + + return Problem( + model=model, + condition_table=condition_table, + experiment_table=experiment_table, + observable_table=observable_table, + measurement_table=measurement_table, + parameter_table=parameter_table, + mapping_table=mapping_table, + visualization_df=visualization_df, + config=config, ) @staticmethod def from_combine(filename: Path | str) -> Problem: """Read PEtab COMBINE archive (http://co.mbine.org/documents/archive). - See also :py:func:`petab.create_combine_archive`. + See also :py:func:`petab.v2.create_combine_archive`. Arguments: filename: Path to the PEtab-COMBINE archive Returns: - A :py:class:`petab.Problem` instance. + A :py:class:`petab.v2.Problem` instance. """ # function-level import, because module-level import interfered with # other SWIG interfaces @@ -390,13 +402,75 @@ def get_problem(problem: str | Path | Problem) -> Problem: "or a PEtab problem object." ) + @property + def condition_df(self) -> pd.DataFrame | None: + """Condition table as DataFrame.""" + # TODO: return empty df? + return self.condition_table.to_df() if self.condition_table else None + + @condition_df.setter + def condition_df(self, value: pd.DataFrame): + self.condition_table = core.ConditionTable.from_df(value) + + @property + def experiment_df(self) -> pd.DataFrame | None: + """Experiment table as DataFrame.""" + return self.experiment_table.to_df() if self.experiment_table else None + + @experiment_df.setter + def experiment_df(self, value: pd.DataFrame): + self.experiment_table = core.ExperimentTable.from_df(value) + + @property + def measurement_df(self) -> pd.DataFrame | None: + """Measurement table as DataFrame.""" + return ( + self.measurement_table.to_df() if self.measurement_table else None + ) + + @measurement_df.setter + def measurement_df(self, value: pd.DataFrame): + self.measurement_table = core.MeasurementTable.from_df(value) + + @property + def parameter_df(self) -> pd.DataFrame | None: + """Parameter table as DataFrame.""" + return self.parameter_table.to_df() if self.parameter_table else None + + @parameter_df.setter + def parameter_df(self, value: pd.DataFrame): + self.parameter_table = core.ParameterTable.from_df(value) + + @property + def observable_df(self) -> pd.DataFrame | None: + """Observable table as DataFrame.""" + return self.observable_table.to_df() if self.observable_table else None + + @observable_df.setter + def observable_df(self, value: pd.DataFrame): + self.observable_table = core.ObservableTable.from_df(value) + + @property + def mapping_df(self) -> pd.DataFrame | None: + """Mapping table as DataFrame.""" + return self.mapping_table.to_df() if self.mapping_table else None + + @mapping_df.setter + def mapping_df(self, value: pd.DataFrame): + self.mapping_table = core.MappingTable.from_df(value) + def get_optimization_parameters(self) -> list[str]: """ - Return list of optimization parameter IDs. + Get the list of optimization parameter IDs from parameter table. - See :py:func:`petab.parameters.get_optimization_parameters`. + Arguments: + parameter_df: PEtab parameter DataFrame + + Returns: + A list of IDs of parameters selected for optimization + (i.e., those with estimate = True). """ - return parameters.get_optimization_parameters(self.parameter_df) + return [p.id for p in self.parameter_table.parameters if p.estimate] def get_optimization_parameter_scales(self) -> dict[str, str]: """ @@ -404,13 +478,14 @@ def get_optimization_parameter_scales(self) -> dict[str, str]: See :py:func:`petab.parameters.get_optimization_parameters`. """ + # TODO: to be removed in v2? return parameters.get_optimization_parameter_scaling(self.parameter_df) def get_observable_ids(self) -> list[str]: """ Returns dictionary of observable ids. """ - return list(self.observable_df.index) + return [o.id for o in self.observable_table.observables] def _apply_mask(self, v: list, free: bool = True, fixed: bool = True): """Apply mask of only free or only fixed values. @@ -420,9 +495,9 @@ def _apply_mask(self, v: list, free: bool = True, fixed: bool = True): v: The full vector the mask is to be applied to. free: - Whether to return free parameters, i.e. parameters to estimate. + Whether to return free parameters, i.e., parameters to estimate. fixed: - Whether to return fixed parameters, i.e. parameters not to + Whether to return fixed parameters, i.e., parameters not to estimate. Returns @@ -452,7 +527,7 @@ def get_x_ids(self, free: bool = True, fixed: bool = True): ------- The parameter IDs. """ - v = list(self.parameter_df.index.values) + v = [p.id for p in self.parameter_table.parameters] return self._apply_mask(v, free=free, fixed=fixed) @property @@ -472,7 +547,7 @@ def x_fixed_ids(self) -> list[str]: def get_x_nominal( self, free: bool = True, fixed: bool = True, scaled: bool = False - ): + ) -> list: """Generic function to get parameter nominal values. Parameters @@ -490,10 +565,10 @@ def get_x_nominal( ------- The parameter nominal values. """ - if NOMINAL_VALUE in self.parameter_df: - v = list(self.parameter_df[NOMINAL_VALUE]) - else: - v = [nan] * len(self.parameter_df) + v = [ + p.nominal_value if p.nominal_value is not None else nan + for p in self.parameter_table.parameters + ] if scaled: v = list( @@ -555,7 +630,10 @@ def get_lb( ------- The lower parameter bounds. """ - v = list(self.parameter_df[LOWER_BOUND]) + v = [ + p.lb if p.lb is not None else nan + for p in self.parameter_table.parameters + ] if scaled: v = list( parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) @@ -592,7 +670,10 @@ def get_ub( ------- The upper parameter bounds. """ - v = list(self.parameter_df[UPPER_BOUND]) + v = [ + p.ub if p.ub is not None else nan + for p in self.parameter_table.parameters + ] if scaled: v = list( parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) @@ -612,19 +693,22 @@ def ub_scaled(self) -> list: @property def x_free_indices(self) -> list[int]: """Parameter table estimated parameter indices.""" - estimated = list(self.parameter_df[ESTIMATE]) - return [j for j, val in enumerate(estimated) if val != 0] + return [ + i + for i, p in enumerate(self.parameter_table.parameters) + if p.estimate + ] @property def x_fixed_indices(self) -> list[int]: """Parameter table non-estimated parameter indices.""" - estimated = list(self.parameter_df[ESTIMATE]) - return [j for j, val in enumerate(estimated) if val == 0] - - def get_simulation_conditions_from_measurement_df(self) -> pd.DataFrame: - """See :func:`petab.get_simulation_conditions`.""" - return measurements.get_simulation_conditions(self.measurement_df) + return [ + i + for i, p in enumerate(self.parameter_table.parameters) + if not p.estimate + ] + # TODO remove in v2? def get_optimization_to_simulation_parameter_mapping(self, **kwargs): """ See @@ -669,6 +753,7 @@ def sample_parameter_startpoints_dict( ) ] + # TODO: remove in v2? def unscale_parameters( self, x_dict: dict[str, float], @@ -693,6 +778,7 @@ def unscale_parameters( for parameter_id, parameter_value in x_dict.items() } + # TODO: remove in v2? def scale_parameters( self, x_dict: dict[str, float], @@ -725,8 +811,9 @@ def n_estimated(self) -> int: @property def n_measurements(self) -> int: """Number of measurements.""" - return self.measurement_df[MEASUREMENT].notna().sum() + return len(self.measurement_table.measurements) + # TODO: update after implementing priors in `Parameter` @property def n_priors(self) -> int: """Number of priors.""" @@ -753,13 +840,14 @@ def validate( ) validation_results = ValidationResultList() - if self.extensions_config: + if self.config.extensions: + extensions = ",".join(e.name for e in self.config.extensions) validation_results.append( ValidationIssue( ValidationIssueSeverity.WARNING, "Validation of PEtab extensions is not yet implemented, " "but the given problem uses the following extensions: " - f"{'', ''.join(self.extensions_config.keys())}", + f"{extensions}", ) ) @@ -782,7 +870,7 @@ def validate( return validation_results def add_condition( - self, id_: str, name: str = None, **kwargs: tuple[str, Number | str] + self, id_: str, name: str = None, **kwargs: Number | str | sp.Expr ): """Add a simulation condition to the problem. @@ -790,29 +878,25 @@ def add_condition( id_: The condition id name: The condition name kwargs: Entities to be added to the condition table in the form - `target_id=(value_type, target_value)`. + `target_id=target_value`. """ if not kwargs: - return - records = [ - { - CONDITION_ID: id_, - TARGET_ID: target_id, - OPERATION_TYPE: op_type, - TARGET_VALUE: target_value, - } - for target_id, (op_type, target_value) in kwargs.items() + raise ValueError("Cannot add condition without any changes") + + changes = [ + core.Change(target_id=target_id, target_value=target_value) + for target_id, target_value in kwargs.items() ] - # TODO: is the condition name supported in v2? - if name is not None: - for record in records: - record[CONDITION_NAME] = [name] - tmp_df = pd.DataFrame(records) - self.condition_df = ( - pd.concat([self.condition_df, tmp_df], ignore_index=True) - if self.condition_df is not None - else tmp_df + self.condition_table.conditions.append( + core.Condition(id=id_, changes=changes) ) + if name is not None: + self.mapping_table.mappings.append( + core.Mapping( + petab_id=id_, + name=name, + ) + ) def add_observable( self, @@ -837,25 +921,20 @@ def add_observable( """ record = { - OBSERVABLE_ID: [id_], - OBSERVABLE_FORMULA: [formula], + OBSERVABLE_ID: id_, + OBSERVABLE_FORMULA: formula, } if name is not None: - record[OBSERVABLE_NAME] = [name] + record[OBSERVABLE_NAME] = name if noise_formula is not None: - record[NOISE_FORMULA] = [noise_formula] + record[NOISE_FORMULA] = noise_formula if noise_distribution is not None: - record[NOISE_DISTRIBUTION] = [noise_distribution] + record[NOISE_DISTRIBUTION] = noise_distribution if transform is not None: - record[OBSERVABLE_TRANSFORMATION] = [transform] + record[OBSERVABLE_TRANSFORMATION] = transform record.update(kwargs) - tmp_df = pd.DataFrame(record).set_index([OBSERVABLE_ID]) - self.observable_df = ( - pd.concat([self.observable_df, tmp_df]) - if self.observable_df is not None - else tmp_df - ) + self.observable_table += core.Observable(**record) def add_parameter( self, @@ -888,42 +967,37 @@ def add_parameter( kwargs: additional columns/values to add to the parameter table """ record = { - PARAMETER_ID: [id_], + PARAMETER_ID: id_, } if estimate is not None: - record[ESTIMATE] = [int(estimate)] + record[ESTIMATE] = estimate if nominal_value is not None: - record[NOMINAL_VALUE] = [nominal_value] + record[NOMINAL_VALUE] = nominal_value if scale is not None: - record[PARAMETER_SCALE] = [scale] + record[PARAMETER_SCALE] = scale if lb is not None: - record[LOWER_BOUND] = [lb] + record[LOWER_BOUND] = lb if ub is not None: - record[UPPER_BOUND] = [ub] + record[UPPER_BOUND] = ub if init_prior_type is not None: - record[INITIALIZATION_PRIOR_TYPE] = [init_prior_type] + record[INITIALIZATION_PRIOR_TYPE] = init_prior_type if init_prior_pars is not None: if not isinstance(init_prior_pars, str): init_prior_pars = PARAMETER_SEPARATOR.join( map(str, init_prior_pars) ) - record[INITIALIZATION_PRIOR_PARAMETERS] = [init_prior_pars] + record[INITIALIZATION_PRIOR_PARAMETERS] = init_prior_pars if obj_prior_type is not None: - record[OBJECTIVE_PRIOR_TYPE] = [obj_prior_type] + record[OBJECTIVE_PRIOR_TYPE] = obj_prior_type if obj_prior_pars is not None: if not isinstance(obj_prior_pars, str): obj_prior_pars = PARAMETER_SEPARATOR.join( map(str, obj_prior_pars) ) - record[OBJECTIVE_PRIOR_PARAMETERS] = [obj_prior_pars] + record[OBJECTIVE_PRIOR_PARAMETERS] = obj_prior_pars record.update(kwargs) - tmp_df = pd.DataFrame(record).set_index([PARAMETER_ID]) - self.parameter_df = ( - pd.concat([self.parameter_df, tmp_df]) - if self.parameter_df is not None - else tmp_df - ) + self.parameter_table += core.Parameter(**record) def add_measurement( self, @@ -931,8 +1005,8 @@ def add_measurement( experiment_id: str, time: float, measurement: float, - observable_parameters: Sequence[str | float] = None, - noise_parameters: Sequence[str | float] = None, + observable_parameters: Sequence[str | float] | str | float = None, + noise_parameters: Sequence[str | float] | str | float = None, ): """Add a measurement to the problem. @@ -944,44 +1018,35 @@ def add_measurement( observable_parameters: The observable parameters noise_parameters: The noise parameters """ - record = { - OBSERVABLE_ID: [obs_id], - EXPERIMENT_ID: [experiment_id], - TIME: [time], - MEASUREMENT: [measurement], - } - if observable_parameters is not None: - record[OBSERVABLE_PARAMETERS] = [ - PARAMETER_SEPARATOR.join(map(str, observable_parameters)) - ] - if noise_parameters is not None: - record[NOISE_PARAMETERS] = [ - PARAMETER_SEPARATOR.join(map(str, noise_parameters)) - ] - - tmp_df = pd.DataFrame(record) - self.measurement_df = ( - pd.concat([self.measurement_df, tmp_df]) - if self.measurement_df is not None - else tmp_df - ).reset_index(drop=True) - - def add_mapping(self, petab_id: str, model_id: str): + if observable_parameters is not None and not isinstance( + observable_parameters, Sequence + ): + observable_parameters = [observable_parameters] + if noise_parameters is not None and not isinstance( + noise_parameters, Sequence + ): + noise_parameters = [noise_parameters] + + self.measurement_table.measurements.append( + core.Measurement( + observable_id=obs_id, + experiment_id=experiment_id, + time=time, + measurement=measurement, + observable_parameters=observable_parameters, + noise_parameters=noise_parameters, + ) + ) + + def add_mapping(self, petab_id: str, model_id: str, name: str = None): """Add a mapping table entry to the problem. Arguments: petab_id: The new PEtab-compatible ID mapping to `model_id` model_id: The ID of some entity in the model """ - record = { - PETAB_ENTITY_ID: [petab_id], - MODEL_ENTITY_ID: [model_id], - } - tmp_df = pd.DataFrame(record).set_index([PETAB_ENTITY_ID]) - self.mapping_df = ( - pd.concat([self.mapping_df, tmp_df]) - if self.mapping_df is not None - else tmp_df + self.mapping_table.mappings.append( + core.Mapping(petab_id=petab_id, model_id=model_id, name=name) ) def add_experiment(self, id_: str, *args): @@ -996,22 +1061,41 @@ def add_experiment(self, id_: str, *args): "Arguments must be pairs of timepoints and condition IDs." ) - records = [] - for i in range(0, len(args), 2): - records.append( - { - EXPERIMENT_ID: id_, - TIME: args[i], - CONDITION_ID: args[i + 1], - } - ) - tmp_df = pd.DataFrame(records) - self.experiment_df = ( - pd.concat([self.experiment_df, tmp_df]) - if self.experiment_df is not None - else tmp_df + periods = [ + core.ExperimentPeriod(time=args[i], condition_id=args[i + 1]) + for i in range(0, len(args), 2) + ] + + self.experiment_table.experiments.append( + core.Experiment(id=id_, periods=periods) + ) + + def __iadd__(self, other): + """Add Observable, Parameter, Measurement, Condition, or Experiment""" + from .core import ( + Condition, + Experiment, + Measurement, + Observable, + Parameter, ) + if isinstance(other, Observable): + self.observable_table += other + elif isinstance(other, Parameter): + self.parameter_table += other + elif isinstance(other, Measurement): + self.measurement_table += other + elif isinstance(other, Condition): + self.condition_table += other + elif isinstance(other, Experiment): + self.experiment_table += other + else: + raise ValueError( + f"Cannot add object of type {type(other)} to Problem." + ) + return self + class ModelFile(BaseModel): """A file in the PEtab problem configuration.""" @@ -1044,19 +1128,25 @@ class ExtensionConfig(BaseModel): class ProblemConfig(BaseModel): """The PEtab problem configuration.""" + #: The path to the PEtab problem configuration. filepath: str | AnyUrl | None = Field( None, description="The path to the PEtab problem configuration.", exclude=True, ) + #: The base path to resolve relative paths. base_path: str | AnyUrl | None = Field( None, description="The base path to resolve relative paths.", exclude=True, ) + #: The PEtab format version. format_version: str = "2.0.0" + #: The path to the parameter file, relative to ``base_path``. parameter_file: str | AnyUrl | None = None + #: The list of problems in the configuration. problems: list[SubProblem] = [] + #: Extensiions used by the problem. extensions: list[ExtensionConfig] = [] def to_yaml(self, filename: str | Path): diff --git a/pytest.ini b/pytest.ini index 4aa44158..b5f0c04d 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,8 @@ [pytest] -addopts = --doctest-modules +addopts = --doctest-modules --durations=0 --durations-min=10 +testpaths = + petab + tests filterwarnings = error # TODO: until tests are reorganized for petab.v1 @@ -8,5 +11,3 @@ filterwarnings = ignore:Support for PEtab2.0 is experimental:UserWarning ignore:.*inspect.getargspec\(\) is deprecated.*:DeprecationWarning ignore:.*Passing unrecognized arguments to super\(PyDevIPCompleter6\).*:DeprecationWarning - # TODO: until we have proper v2 support - ignore:The experiment table is not yet supported and will be ignored:UserWarning diff --git a/tests/v1/math/test_math.py b/tests/v1/math/test_math.py index 828aac88..dae78154 100644 --- a/tests/v1/math/test_math.py +++ b/tests/v1/math/test_math.py @@ -74,6 +74,11 @@ def test_ids(): """Test symbols in expressions.""" assert sympify_petab("bla * 2") == 2.0 * sp.Symbol("bla", real=True) + # test that sympy expressions that are invalid in PEtab raise an error + # TODO: handle these cases after + # https://github.com/PEtab-dev/libpetab-python/pull/364 + # sympify_petab(sp.Symbol("föö")) + def test_syntax_error(): """Test exceptions upon syntax errors.""" diff --git a/tests/v2/test_conversion.py b/tests/v2/test_conversion.py index 4b982fcf..612606ab 100644 --- a/tests/v2/test_conversion.py +++ b/tests/v2/test_conversion.py @@ -1,21 +1,21 @@ import logging -import tempfile import pytest +from petab.v2 import Problem from petab.v2.petab1to2 import petab1to2 def test_petab1to2_remote(): + """Test that we can upgrade a remote PEtab 1.0.0 problem.""" yaml_url = ( "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml" ) - with tempfile.TemporaryDirectory(prefix="test_petab1to2") as tmpdirname: - # TODO verify that the v2 files match "ground truth" - # in `petabtests/cases/v2.0.0/sbml/0001/_0001.yaml` - petab1to2(yaml_url, tmpdirname) + problem = petab1to2(yaml_url) + assert isinstance(problem, Problem) + assert len(problem.measurement_table.measurements) try: @@ -36,10 +36,10 @@ def test_benchmark_collection(problem_id): logging.basicConfig(level=logging.DEBUG) if problem_id == "Froehlich_CellSystems2018": + # this is mostly about 6M sympifications in the condition table pytest.skip("Too slow. Re-enable once we are faster.") yaml_path = benchmark_models_petab.get_problem_yaml_path(problem_id) - with tempfile.TemporaryDirectory( - prefix=f"test_petab1to2_{problem_id}" - ) as tmpdirname: - petab1to2(yaml_path, tmpdirname) + problem = petab1to2(yaml_path) + assert isinstance(problem, Problem) + assert len(problem.measurement_table.measurements) diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index a7eae851..181f5523 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -1,39 +1,36 @@ import tempfile from pathlib import Path -from petab.v2.core import ( - Change, - ChangeSet, - ConditionsTable, - Experiment, - ExperimentPeriod, - ObservablesTable, - OperationType, -) +import pytest +import sympy as sp +from pydantic import ValidationError +from sympy.abc import x, y + +from petab.v2.core import * from petab.v2.petab1to2 import petab1to2 example_dir_fujita = Path(__file__).parents[2] / "doc/example/example_Fujita" -def test_observables_table_round_trip(): +def test_observable_table_round_trip(): file = example_dir_fujita / "Fujita_observables.tsv" - observables = ObservablesTable.from_tsv(file) + observables = ObservableTable.from_tsv(file) with tempfile.TemporaryDirectory() as tmp_dir: tmp_file = Path(tmp_dir) / "observables.tsv" observables.to_tsv(tmp_file) - observables2 = ObservablesTable.from_tsv(tmp_file) + observables2 = ObservableTable.from_tsv(tmp_file) assert observables == observables2 -def test_conditions_table_round_trip(): +def test_condition_table_round_trip(): with tempfile.TemporaryDirectory() as tmp_dir: petab1to2(example_dir_fujita / "Fujita.yaml", tmp_dir) file = Path(tmp_dir, "Fujita_experimentalCondition.tsv") - conditions = ConditionsTable.from_tsv(file) + conditions = ConditionTable.from_tsv(file) tmp_file = Path(tmp_dir) / "conditions.tsv" conditions.to_tsv(tmp_file) - conditions2 = ConditionsTable.from_tsv(tmp_file) + conditions2 = ConditionTable.from_tsv(tmp_file) assert conditions == conditions2 @@ -42,9 +39,9 @@ def test_experiment_add_periods(): exp = Experiment(id="exp1") assert exp.periods == [] - p1 = ExperimentPeriod(start=0, condition_id="p1") - p2 = ExperimentPeriod(start=1, condition_id="p2") - p3 = ExperimentPeriod(start=2, condition_id="p3") + p1 = ExperimentPeriod(time=0, condition_id="p1") + p2 = ExperimentPeriod(time=1, condition_id="p2") + p3 = ExperimentPeriod(time=2, condition_id="p3") exp += p1 exp += p2 @@ -55,20 +52,227 @@ def test_experiment_add_periods(): assert exp.periods == [p1, p2] -def test_conditions_table_add_changeset(): - conditions_table = ConditionsTable() - assert conditions_table.conditions == [] +def test_condition_table_add_changes(): + condition_table = ConditionTable() + assert condition_table.conditions == [] - c1 = ChangeSet( + c1 = Condition( id="condition1", - changes=[Change(operation_type=OperationType.NO_CHANGE)], + changes=[Change(target_id="k1", target_value=1)], ) - c2 = ChangeSet( + c2 = Condition( id="condition2", - changes=[Change(operation_type=OperationType.NO_CHANGE)], + changes=[Change(target_id="k2", target_value=sp.sympify("2 * x"))], + ) + + condition_table += c1 + condition_table += c2 + + assert condition_table.conditions == [c1, c2] + + +def test_measurments(): + Measurement( + observable_id="obs1", time=1, experiment_id="exp1", measurement=1 + ) + Measurement( + observable_id="obs1", time="1", experiment_id="exp1", measurement="1" + ) + Measurement( + observable_id="obs1", time="inf", experiment_id="exp1", measurement="1" + ) + + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + observable_parameters=["p1"], + noise_parameters=["n1"], + ) + + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + observable_parameters=[1], + noise_parameters=[2], + ) + + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + observable_parameters=[sp.sympify("x ** y")], + noise_parameters=[sp.sympify("x ** y")], ) - conditions_table += c1 - conditions_table += c2 + assert ( + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + non_petab=1, + ).non_petab + == 1 + ) + + with pytest.raises(ValidationError, match="got -inf"): + Measurement( + observable_id="obs1", + time="-inf", + experiment_id="exp1", + measurement=1, + ) + + with pytest.raises(ValidationError, match="Invalid ID"): + Measurement( + observable_id="1_obs", time=1, experiment_id="exp1", measurement=1 + ) + + with pytest.raises(ValidationError, match="Invalid ID"): + Measurement( + observable_id="obs", time=1, experiment_id=" exp1", measurement=1 + ) + + +def test_observable(): + Observable(id="obs1", formula=x + y) + Observable(id="obs1", formula="x + y", noise_formula="x + y") + Observable(id="obs1", formula=1, noise_formula=2) + Observable( + id="obs1", + formula="x + y", + noise_formula="x + y", + observable_parameters=["p1"], + noise_parameters=["n1"], + ) + Observable( + id="obs1", + formula=sp.sympify("x + y"), + noise_formula=sp.sympify("x + y"), + observable_parameters=[sp.Symbol("p1")], + noise_parameters=[sp.Symbol("n1")], + ) + assert Observable(id="obs1", formula="x + y", non_petab=1).non_petab == 1 + + o = Observable(id="obs1", formula=x + y) + assert o.observable_placeholders == set() + assert o.noise_placeholders == set() + + o = Observable( + id="obs1", + formula="observableParameter1_obs1", + noise_formula="noiseParameter1_obs1", + ) + assert o.observable_placeholders == { + sp.Symbol("observableParameter1_obs1", real=True), + } + assert o.noise_placeholders == { + sp.Symbol("noiseParameter1_obs1", real=True) + } + + # TODO: this should raise an error + # (numbering is not consecutive / not starting from 1) + # TODO: clarify if observableParameter0_obs1 would be allowed + # as regular parameter + # + # with pytest.raises(ValidationError): + # Observable(id="obs1", formula="observableParameter2_obs1") + + +def test_change(): + Change(target_id="k1", target_value=1) + Change(target_id="k1", target_value="x * y") + + assert ( + Change(target_id="k1", target_value=x * y, non_petab="foo").non_petab + == "foo" + ) + with pytest.raises(ValidationError, match="Invalid ID"): + Change(target_id="1_k", target_value=x) + + with pytest.raises(ValidationError, match="input_value=None"): + Change(target_id="k1", target_value=None) + + +def test_period(): + ExperimentPeriod(time=0) + ExperimentPeriod(time=1, condition_id="p1") + ExperimentPeriod(time="-inf", condition_id="p1") + + assert ( + ExperimentPeriod(time="1", condition_id="p1", non_petab=1).non_petab + == 1 + ) + + with pytest.raises(ValidationError, match="got inf"): + ExperimentPeriod(time="inf", condition_id="p1") + + with pytest.raises(ValidationError, match="Invalid ID"): + ExperimentPeriod(time=1, condition_id="1_condition") + + with pytest.raises(ValidationError, match="type=missing"): + ExperimentPeriod(condition_id="condition") + + +def test_parameter(): + Parameter(id="k1", lb=1, ub=2) + Parameter(id="k1", estimate=False, nominal_value=1) + + assert Parameter(id="k1", lb=1, ub=2, non_petab=1).non_petab == 1 + + with pytest.raises(ValidationError, match="Invalid ID"): + Parameter(id="1_k", lb=1, ub=2) + + with pytest.raises(ValidationError, match="upper"): + Parameter(id="k1", lb=1) + + with pytest.raises(ValidationError, match="lower"): + Parameter(id="k1", ub=1) + + with pytest.raises(ValidationError, match="less than"): + Parameter(id="k1", lb=2, ub=1) + + +def test_experiment(): + Experiment(id="experiment1") + Experiment( + id="experiment1", periods=[ExperimentPeriod(time=1, condition_id="c1")] + ) + + assert Experiment(id="experiment1", non_petab=1).non_petab == 1 + + with pytest.raises(ValidationError, match="Field required"): + Experiment() + + with pytest.raises(ValidationError, match="Invalid ID"): + Experiment(id="experiment 1") + + +def test_condition_table(): + assert ConditionTable().free_symbols == set() + + assert ( + ConditionTable( + conditions=[ + Condition( + id="condition1", + changes=[Change(target_id="k1", target_value="true")], + ) + ] + ).free_symbols + == set() + ) - assert conditions_table.conditions == [c1, c2] + assert ConditionTable( + conditions=[ + Condition( + id="condition1", + changes=[Change(target_id="k1", target_value=x / y)], + ) + ] + ).free_symbols == {x, y} diff --git a/tests/v2/test_lint.py b/tests/v2/test_lint.py index db0c402a..33cdb300 100644 --- a/tests/v2/test_lint.py +++ b/tests/v2/test_lint.py @@ -10,23 +10,14 @@ def test_check_experiments(): """Test ``CheckExperimentTable``.""" problem = Problem() - problem.add_experiment("e1", 0, "c1", 1, "c2") - problem.add_experiment("e2", "-inf", "c1", 1, "c2") - assert problem.experiment_df.shape == (4, 3) check = CheckExperimentTable() assert check.run(problem) is None - assert check.run(Problem()) is None - - tmp_problem = deepcopy(problem) - tmp_problem.experiment_df.loc[0, TIME] = "invalid" - assert check.run(tmp_problem) is not None - - tmp_problem = deepcopy(problem) - tmp_problem.experiment_df.loc[0, TIME] = "inf" - assert check.run(tmp_problem) is not None + problem.add_experiment("e1", 0, "c1", 1, "c2") + problem.add_experiment("e2", "-inf", "c1", 1, "c2") + assert check.run(problem) is None tmp_problem = deepcopy(problem) - tmp_problem.experiment_df.drop(columns=[TIME], inplace=True) + tmp_problem["e1"].periods[0].time = tmp_problem["e1"].periods[1].time assert check.run(tmp_problem) is not None diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index 04e394ad..d4eab006 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -12,12 +12,11 @@ ESTIMATE, LOWER_BOUND, MODEL_ENTITY_ID, + NAME, NOISE_FORMULA, NOMINAL_VALUE, OBSERVABLE_FORMULA, OBSERVABLE_ID, - OPERATION_TYPE, - OT_CUR_VAL, PARAMETER_ID, PETAB_ENTITY_ID, TARGET_ID, @@ -73,7 +72,7 @@ def test_problem_from_yaml_multiple_files(): for i in (1, 2): problem = Problem() - problem.add_condition(f"condition{i}", parameter1=(OT_CUR_VAL, i)) + problem.add_condition(f"condition{i}", parameter1=i) petab.write_condition_df( problem.condition_df, Path(tmpdir, f"conditions{i}.tsv") ) @@ -109,14 +108,13 @@ def test_problem_from_yaml_multiple_files(): def test_modify_problem(): """Test modifying a problem via the API.""" problem = Problem() - problem.add_condition("condition1", parameter1=(OT_CUR_VAL, 1)) - problem.add_condition("condition2", parameter2=(OT_CUR_VAL, 2)) + problem.add_condition("condition1", parameter1=1) + problem.add_condition("condition2", parameter2=2) exp_condition_df = pd.DataFrame( data={ CONDITION_ID: ["condition1", "condition2"], TARGET_ID: ["parameter1", "parameter2"], - OPERATION_TYPE: [OT_CUR_VAL, OT_CUR_VAL], TARGET_VALUE: [1.0, 2.0], } ) @@ -130,12 +128,14 @@ def test_modify_problem(): exp_observable_df = pd.DataFrame( data={ OBSERVABLE_ID: ["observable1", "observable2"], - OBSERVABLE_FORMULA: ["1", "2"], + OBSERVABLE_FORMULA: [1, 2], NOISE_FORMULA: [np.nan, 2.2], } ).set_index([OBSERVABLE_ID]) assert_frame_equal( - problem.observable_df, exp_observable_df, check_dtype=False + problem.observable_df[[OBSERVABLE_FORMULA, NOISE_FORMULA]], + exp_observable_df, + check_dtype=False, ) problem.add_parameter("parameter1", 1, 0, lb=1, ub=2) @@ -151,7 +151,11 @@ def test_modify_problem(): } ).set_index([PARAMETER_ID]) assert_frame_equal( - problem.parameter_df, exp_parameter_df, check_dtype=False + problem.parameter_df[ + [ESTIMATE, NOMINAL_VALUE, LOWER_BOUND, UPPER_BOUND] + ], + exp_parameter_df, + check_dtype=False, ) problem.add_mapping("new_petab_id", "some_model_entity_id") @@ -160,6 +164,7 @@ def test_modify_problem(): data={ PETAB_ENTITY_ID: ["new_petab_id"], MODEL_ENTITY_ID: ["some_model_entity_id"], + NAME: [None], } ).set_index([PETAB_ENTITY_ID]) assert_frame_equal(problem.mapping_df, exp_mapping_df, check_dtype=False)