From 31da444d9142de4b1c8ecf25cb7519d01db584a2 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 17 Mar 2025 16:29:25 +0100 Subject: [PATCH 01/19] Misc fixes --- petab/v2/core.py | 10 +++++++--- petab/v2/lint.py | 5 ++++- petab/v2/problem.py | 8 +++++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 3c23f652..94f614b9 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -411,17 +411,21 @@ class ExperimentPeriod(BaseModel): #: The start time of the period in time units as defined in the model. start: float = Field(alias=C.TIME) + # TODO: decide if optional #: The ID of the condition to be applied at the start time. condition_id: str = Field(alias=C.CONDITION_ID) #: :meta private: model_config = ConfigDict(populate_by_name=True) - @field_validator("condition_id") + @field_validator("condition_id", mode="before") @classmethod def _validate_id(cls, condition_id): - if not condition_id: - raise ValueError("ID must not be empty.") + # TODO to be decided if optional + if pd.isna(condition_id): + return "" + # if not condition_id: + # raise ValueError("ID must not be empty.") if not is_valid_identifier(condition_id): raise ValueError(f"Invalid ID: {condition_id}") return condition_id diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 2deb0ebd..b8747708 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -545,6 +545,8 @@ def run(self, problem: Problem) -> ValidationIssue | None: missing_conditions = set(required_conditions) - set( existing_conditions ) + # TODO NA allowed? + missing_conditions = {x for x in missing_conditions if not pd.isna(x)} if missing_conditions: return ValidationError( f"Experiment table contains conditions that are not present " @@ -842,7 +844,8 @@ def append_overrides(overrides): CheckExperimentTable(), CheckValidPetabIdColumn("measurement", EXPERIMENT_ID, ignore_nan=True), CheckValidPetabIdColumn("experiment", EXPERIMENT_ID), - CheckValidPetabIdColumn("experiment", CONDITION_ID), + # TODO: NAN allowed? + CheckValidPetabIdColumn("experiment", CONDITION_ID, ignore_nan=True), CheckExperimentConditionsExist(), CheckObservableTable(), CheckObservablesDoNotShadowModelEntities(), diff --git a/petab/v2/problem.py b/petab/v2/problem.py index d18b4b7c..a6ef665b 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -798,10 +798,12 @@ def add_condition( { CONDITION_ID: id_, TARGET_ID: target_id, - OPERATION_TYPE: op_type, - TARGET_VALUE: target_value, + OPERATION_TYPE: "setCurrentValue", + TARGET_VALUE: target_value + if not isinstance(target_value, tuple) + else target_value[1], } - for target_id, (op_type, target_value) in kwargs.items() + for target_id, target_value in kwargs.items() ] # TODO: is the condition name supported in v2? if name is not None: From 5b877014d97b7277720c3ce8a1d5646d75c6ef49 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 18 Mar 2025 15:59:19 +0100 Subject: [PATCH 02/19] dfs->objs --- petab/v2/core.py | 100 +++++++++++---- petab/v2/problem.py | 260 ++++++++++++++++++++++++++------------- pytest.ini | 5 +- tests/v2/test_core.py | 6 +- tests/v2/test_problem.py | 12 +- 5 files changed, 269 insertions(+), 114 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 94f614b9..62a6040f 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -27,7 +27,7 @@ "ObservableTransformation", "NoiseDistribution", "Change", - "ChangeSet", + "Condition", "ConditionsTable", "OperationType", "ExperimentPeriod", @@ -200,7 +200,25 @@ def from_df(cls, df: pd.DataFrame) -> ObservablesTable: def to_df(self) -> pd.DataFrame: """Convert the ObservablesTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["observables"]) + records = self.model_dump(by_alias=True)["observables"] + for record in records: + obs = record[C.OBSERVABLE_FORMULA] + noise = record[C.NOISE_FORMULA] + record[C.OBSERVABLE_FORMULA] = ( + None + if obs is None + else str(obs) + if not obs.is_number + else float(obs) + ) + record[C.NOISE_FORMULA] = ( + None + if noise is None + else str(noise) + if not noise.is_number + else float(noise) + ) + return pd.DataFrame(records).set_index([C.OBSERVABLE_ID]) @classmethod def from_tsv(cls, file_path: str | Path) -> ObservablesTable: @@ -211,7 +229,7 @@ def from_tsv(cls, file_path: str | Path) -> ObservablesTable: def to_tsv(self, file_path: str | Path) -> None: """Write the ObservablesTable to a TSV file.""" df = self.to_df() - df.to_csv(file_path, sep="\t", index=False) + df.to_csv(file_path, sep="\t", index=True) def __add__(self, other: Observable) -> ObservablesTable: """Add an observable to the table.""" @@ -290,14 +308,14 @@ def _sympify(cls, v): return sympify_petab(v) -class ChangeSet(BaseModel): +class Condition(BaseModel): """A set of changes to the model or model state. A set of simultaneously occurring changes to the model or model state, corresponding to a perturbation of the underlying system. This corresponds to all rows of the PEtab conditions table with the same condition ID. - >>> ChangeSet( + >>> Condition( ... id="condition1", ... changes=[ ... Change( @@ -307,7 +325,7 @@ class ChangeSet(BaseModel): ... ) ... ], ... ) # doctest: +NORMALIZE_WHITESPACE - ChangeSet(id='condition1', changes=[Change(target_id='k1', + Condition(id='condition1', changes=[Change(target_id='k1', operation_type='setCurrentValue', target_value=10.0000000000000)]) """ @@ -328,13 +346,13 @@ def _validate_id(cls, v): raise ValueError(f"Invalid ID: {v}") return v - def __add__(self, other: Change) -> ChangeSet: + def __add__(self, other: Change) -> Condition: """Add a change to the set.""" if not isinstance(other, Change): raise TypeError("Can only add Change to ChangeSet") - return ChangeSet(id=self.id, changes=self.changes + [other]) + return Condition(id=self.id, changes=self.changes + [other]) - def __iadd__(self, other: Change) -> ChangeSet: + def __iadd__(self, other: Change) -> Condition: """Add a change to the set in place.""" if not isinstance(other, Change): raise TypeError("Can only add Change to ChangeSet") @@ -346,9 +364,9 @@ class ConditionsTable(BaseModel): """PEtab conditions table.""" #: List of conditions. - conditions: list[ChangeSet] = [] + conditions: list[Condition] = [] - def __getitem__(self, condition_id: str) -> ChangeSet: + def __getitem__(self, condition_id: str) -> Condition: """Get a condition by ID.""" for condition in self.conditions: if condition.id == condition_id: @@ -364,18 +382,28 @@ def from_df(cls, df: pd.DataFrame) -> ConditionsTable: conditions = [] for condition_id, sub_df in df.groupby(C.CONDITION_ID): changes = [Change(**row.to_dict()) for _, row in sub_df.iterrows()] - conditions.append(ChangeSet(id=condition_id, changes=changes)) + conditions.append(Condition(id=condition_id, changes=changes)) return cls(conditions=conditions) def to_df(self) -> pd.DataFrame: """Convert the ConditionsTable to a DataFrame.""" records = [ - {C.CONDITION_ID: condition.id, **change.model_dump()} + {C.CONDITION_ID: condition.id, **change.model_dump(by_alias=True)} for condition in self.conditions for change in condition.changes ] - return pd.DataFrame(records) + for record in records: + record[C.TARGET_VALUE] = ( + float(record[C.TARGET_VALUE]) + if record[C.TARGET_VALUE].is_number + else str(record[C.TARGET_VALUE]) + ) + return ( + pd.DataFrame(records) + if records + else pd.DataFrame(columns=C.CONDITION_DF_REQUIRED_COLS) + ) @classmethod def from_tsv(cls, file_path: str | Path) -> ConditionsTable: @@ -388,15 +416,15 @@ def to_tsv(self, file_path: str | Path) -> None: df = self.to_df() df.to_csv(file_path, sep="\t", index=False) - def __add__(self, other: ChangeSet) -> ConditionsTable: + def __add__(self, other: Condition) -> ConditionsTable: """Add a condition to the table.""" - if not isinstance(other, ChangeSet): + if not isinstance(other, Condition): raise TypeError("Can only add ChangeSet to ConditionsTable") return ConditionsTable(conditions=self.conditions + [other]) - def __iadd__(self, other: ChangeSet) -> ConditionsTable: + def __iadd__(self, other: Condition) -> ConditionsTable: """Add a condition to the table in place.""" - if not isinstance(other, ChangeSet): + if not isinstance(other, Condition): raise TypeError("Can only add ChangeSet to ConditionsTable") self.conditions.append(other) return self @@ -498,7 +526,19 @@ def from_df(cls, df: pd.DataFrame) -> ExperimentsTable: def to_df(self) -> pd.DataFrame: """Convert the ExperimentsTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["experiments"]) + records = [ + { + C.EXPERIMENT_ID: experiment.id, + **period.model_dump(by_alias=True), + } + for experiment in self.experiments + for period in experiment.periods + ] + return ( + pd.DataFrame(records) + if records + else pd.DataFrame(columns=C.EXPERIMENT_DF_REQUIRED_COLS) + ) @classmethod def from_tsv(cls, file_path: str | Path) -> ExperimentsTable: @@ -617,7 +657,16 @@ def from_df( def to_df(self) -> pd.DataFrame: """Convert the MeasurementTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["measurements"]) + records = self.model_dump(by_alias=True)["measurements"] + for record in records: + record[C.OBSERVABLE_PARAMETERS] = C.PARAMETER_SEPARATOR.join( + map(str, record[C.OBSERVABLE_PARAMETERS]) + ) + record[C.NOISE_PARAMETERS] = C.PARAMETER_SEPARATOR.join( + map(str, record[C.NOISE_PARAMETERS]) + ) + + return pd.DataFrame(records) @classmethod def from_tsv(cls, file_path: str | Path) -> MeasurementTable: @@ -687,7 +736,12 @@ def from_df(cls, df: pd.DataFrame) -> MappingTable: def to_df(self) -> pd.DataFrame: """Convert the MappingTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["mappings"]) + res = ( + pd.DataFrame(self.model_dump(by_alias=True)["mappings"]) + if self.mappings + else pd.DataFrame(columns=C.MAPPING_DF_REQUIRED_COLS) + ) + return res.set_index([C.PETAB_ENTITY_ID]) @classmethod def from_tsv(cls, file_path: str | Path) -> MappingTable: @@ -778,7 +832,9 @@ def from_df(cls, df: pd.DataFrame) -> ParameterTable: def to_df(self) -> pd.DataFrame: """Convert the ParameterTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["parameters"]) + return pd.DataFrame( + self.model_dump(by_alias=True)["parameters"] + ).set_index([C.PARAMETER_ID]) @classmethod def from_tsv(cls, file_path: str | Path) -> ParameterTable: diff --git a/petab/v2/problem.py b/petab/v2/problem.py index a6ef665b..383a0cb9 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -16,7 +16,6 @@ from pydantic import AnyUrl, BaseModel, Field from ..v1 import ( - core, mapping, measurements, observables, @@ -25,11 +24,12 @@ sampling, yaml, ) +from ..v1.core import concat_tables, get_visualization_df from ..v1.models.model import Model, model_factory from ..v1.yaml import get_path_prefix from ..v2.C import * # noqa: F403 from ..versions import parse_version -from . import conditions, experiments +from . import conditions, core, experiments if TYPE_CHECKING: from ..v2.lint import ValidationResultList, ValidationTask @@ -71,25 +71,18 @@ class Problem: def __init__( self, model: Model = None, - condition_df: pd.DataFrame = None, - experiment_df: pd.DataFrame = None, - measurement_df: pd.DataFrame = None, - parameter_df: pd.DataFrame = None, + conditions_table: core.ConditionsTable = None, + experiments_table: core.ExperimentsTable = None, + observables_table: core.ObservablesTable = None, + measurement_table: core.MeasurementTable = None, + parameters_table: core.ParametersTable = None, + mapping_table: core.MappingTable = None, visualization_df: pd.DataFrame = None, - observable_df: pd.DataFrame = None, - mapping_df: pd.DataFrame = None, extensions_config: dict = None, config: ProblemConfig = None, ): from ..v2.lint import default_validation_tasks - self.condition_df: pd.DataFrame | None = condition_df - self.experiment_df: pd.DataFrame | None = experiment_df - self.measurement_df: pd.DataFrame | None = measurement_df - self.parameter_df: pd.DataFrame | None = parameter_df - self.visualization_df: pd.DataFrame | None = visualization_df - self.observable_df: pd.DataFrame | None = observable_df - self.mapping_df: pd.DataFrame | None = mapping_df self.model: Model | None = model self.extensions_config = extensions_config or {} self.validation_tasks: list[ValidationTask] = ( @@ -97,44 +90,26 @@ def __init__( ) self.config = config - from .core import ( - ChangeSet, - ConditionsTable, - Experiment, - ExperimentsTable, - MappingTable, - MeasurementTable, - Observable, - ObservablesTable, - ParameterTable, + self.observables_table = observables_table or core.ObservablesTable( + observables=[] ) - - self.observables_table: ObservablesTable = ObservablesTable.from_df( - self.observable_df + self.conditions_table = conditions_table or core.ConditionsTable( + conditions=[] ) - self.observables: list[Observable] = self.observables_table.observables - - self.conditions_table: ConditionsTable = ConditionsTable.from_df( - self.condition_df + self.experiments_table = experiments_table or core.ExperimentsTable( + experiments=[] ) - self.conditions: list[ChangeSet] = self.conditions_table.conditions - - self.experiments_table: ExperimentsTable = ExperimentsTable.from_df( - self.experiment_df + self.measurement_table = measurement_table or core.MeasurementTable( + measurements=[] ) - self.experiments: list[Experiment] = self.experiments_table.experiments - - self.measurement_table: MeasurementTable = MeasurementTable.from_df( - self.measurement_df, + self.mapping_table = mapping_table or core.MappingTable(mappings=[]) + self.parameter_table = parameters_table or core.ParameterTable( + parameters=[] ) - self.mapping_table: MappingTable = MappingTable.from_df( - self.mapping_df - ) - self.parameter_table: ParameterTable = ParameterTable.from_df( - self.parameter_df - ) - # TODO: visualization table + self.visualization_df = visualization_df + self.config = config + self.extensions_config = extensions_config def __str__(self): model = f"with model ({self.model})" if self.model else "without model" @@ -273,9 +248,7 @@ def get_path(filename): measurement_files = [get_path(f) for f in problem0.measurement_files] # If there are multiple tables, we will merge them measurement_df = ( - core.concat_tables( - measurement_files, measurements.get_measurement_df - ) + concat_tables(measurement_files, measurements.get_measurement_df) if measurement_files else None ) @@ -283,7 +256,7 @@ def get_path(filename): condition_files = [get_path(f) for f in problem0.condition_files] # If there are multiple tables, we will merge them condition_df = ( - core.concat_tables(condition_files, conditions.get_condition_df) + concat_tables(condition_files, conditions.get_condition_df) if condition_files else None ) @@ -291,7 +264,7 @@ def get_path(filename): experiment_files = [get_path(f) for f in problem0.experiment_files] # If there are multiple tables, we will merge them experiment_df = ( - core.concat_tables(experiment_files, experiments.get_experiment_df) + concat_tables(experiment_files, experiments.get_experiment_df) if experiment_files else None ) @@ -301,7 +274,7 @@ def get_path(filename): ] # If there are multiple tables, we will merge them visualization_df = ( - core.concat_tables(visualization_files, core.get_visualization_df) + concat_tables(visualization_files, get_visualization_df) if visualization_files else None ) @@ -309,7 +282,7 @@ def get_path(filename): observable_files = [get_path(f) for f in problem0.observable_files] # If there are multiple tables, we will merge them observable_df = ( - core.concat_tables(observable_files, observables.get_observable_df) + concat_tables(observable_files, observables.get_observable_df) if observable_files else None ) @@ -317,12 +290,12 @@ def get_path(filename): mapping_files = [get_path(f) for f in problem0.mapping_files] # If there are multiple tables, we will merge them mapping_df = ( - core.concat_tables(mapping_files, mapping.get_mapping_df) + concat_tables(mapping_files, mapping.get_mapping_df) if mapping_files else None ) - return Problem( + return Problem.from_dfs( condition_df=condition_df, experiment_df=experiment_df, measurement_df=measurement_df, @@ -334,6 +307,54 @@ def get_path(filename): extensions_config=config.extensions, ) + @staticmethod + def from_dfs( + model: Model = None, + condition_df: pd.DataFrame = None, + experiment_df: pd.DataFrame = None, + measurement_df: pd.DataFrame = None, + parameter_df: pd.DataFrame = None, + visualization_df: pd.DataFrame = None, + observable_df: pd.DataFrame = None, + mapping_df: pd.DataFrame = None, + extensions_config: dict = None, + config: ProblemConfig = None, + ): + """ + Construct a PEtab problem from dataframes. + + Parameters: + condition_df: PEtab condition table + experiment_df: PEtab experiment table + measurement_df: PEtab measurement table + parameter_df: PEtab parameter table + observable_df: PEtab observable table + visualization_df: PEtab visualization table + mapping_df: PEtab mapping table + model: The underlying model + extensions_config: Information on the extensions used + """ + + observables_table = core.ObservablesTable.from_df(observable_df) + conditions_table = core.ConditionsTable.from_df(condition_df) + experiments_table = core.ExperimentsTable.from_df(experiment_df) + measurement_table = core.MeasurementTable.from_df(measurement_df) + mapping_table = core.MappingTable.from_df(mapping_df) + parameter_table = core.ParameterTable.from_df(parameter_df) + + return Problem( + model=model, + conditions_table=conditions_table, + experiments_table=experiments_table, + observables_table=observables_table, + measurement_table=measurement_table, + parameters_table=parameter_table, + mapping_table=mapping_table, + visualization_df=visualization_df, + extensions_config=extensions_config, + config=config, + ) + @staticmethod def from_combine(filename: Path | str) -> Problem: """Read PEtab COMBINE archive (http://co.mbine.org/documents/archive). @@ -390,6 +411,60 @@ def get_problem(problem: str | Path | Problem) -> Problem: "or a PEtab problem object." ) + @property + def condition_df(self) -> pd.DataFrame | None: + return self.conditions_table.to_df() if self.conditions_table else None + + @condition_df.setter + def condition_df(self, value: pd.DataFrame): + self.conditions_table = core.ConditionsTable.from_df(value) + + @property + def experiment_df(self) -> pd.DataFrame | None: + return ( + self.experiments_table.to_df() if self.experiments_table else None + ) + + @experiment_df.setter + def experiment_df(self, value: pd.DataFrame): + self.experiments_table = core.ExperimentsTable.from_df(value) + + @property + def measurement_df(self) -> pd.DataFrame | None: + return ( + self.measurement_table.to_df() if self.measurement_table else None + ) + + @measurement_df.setter + def measurement_df(self, value: pd.DataFrame): + self.measurement_table = core.MeasurementTable.from_df(value) + + @property + def parameter_df(self) -> pd.DataFrame | None: + return self.parameter_table.to_df() if self.parameter_table else None + + @parameter_df.setter + def parameter_df(self, value: pd.DataFrame): + self.parameter_table = core.ParameterTable.from_df(value) + + @property + def observable_df(self) -> pd.DataFrame | None: + return ( + self.observables_table.to_df() if self.observables_table else None + ) + + @observable_df.setter + def observable_df(self, value: pd.DataFrame): + self.observables_table = core.ObservablesTable.from_df(value) + + @property + def mapping_df(self) -> pd.DataFrame | None: + return self.mapping_table.to_df() if self.mapping_table else None + + @mapping_df.setter + def mapping_df(self, value: pd.DataFrame): + self.mapping_table = core.MappingTable.from_df(value) + def get_optimization_parameters(self) -> list[str]: """ Return list of optimization parameter IDs. @@ -839,25 +914,19 @@ def add_observable( """ record = { - OBSERVABLE_ID: [id_], - OBSERVABLE_FORMULA: [formula], + OBSERVABLE_ID: id_, + OBSERVABLE_FORMULA: formula, } if name is not None: - record[OBSERVABLE_NAME] = [name] + record[OBSERVABLE_NAME] = name if noise_formula is not None: - record[NOISE_FORMULA] = [noise_formula] + record[NOISE_FORMULA] = noise_formula if noise_distribution is not None: - record[NOISE_DISTRIBUTION] = [noise_distribution] + record[NOISE_DISTRIBUTION] = noise_distribution if transform is not None: - record[OBSERVABLE_TRANSFORMATION] = [transform] + record[OBSERVABLE_TRANSFORMATION] = transform record.update(kwargs) - - tmp_df = pd.DataFrame(record).set_index([OBSERVABLE_ID]) - self.observable_df = ( - pd.concat([self.observable_df, tmp_df]) - if self.observable_df is not None - else tmp_df - ) + self.observables_table += core.Observable(**record) def add_parameter( self, @@ -890,42 +959,37 @@ def add_parameter( kwargs: additional columns/values to add to the parameter table """ record = { - PARAMETER_ID: [id_], + PARAMETER_ID: id_, } if estimate is not None: - record[ESTIMATE] = [int(estimate)] + record[ESTIMATE] = int(estimate) if nominal_value is not None: - record[NOMINAL_VALUE] = [nominal_value] + record[NOMINAL_VALUE] = nominal_value if scale is not None: - record[PARAMETER_SCALE] = [scale] + record[PARAMETER_SCALE] = scale if lb is not None: - record[LOWER_BOUND] = [lb] + record[LOWER_BOUND] = lb if ub is not None: - record[UPPER_BOUND] = [ub] + record[UPPER_BOUND] = ub if init_prior_type is not None: - record[INITIALIZATION_PRIOR_TYPE] = [init_prior_type] + record[INITIALIZATION_PRIOR_TYPE] = init_prior_type if init_prior_pars is not None: if not isinstance(init_prior_pars, str): init_prior_pars = PARAMETER_SEPARATOR.join( map(str, init_prior_pars) ) - record[INITIALIZATION_PRIOR_PARAMETERS] = [init_prior_pars] + record[INITIALIZATION_PRIOR_PARAMETERS] = init_prior_pars if obj_prior_type is not None: - record[OBJECTIVE_PRIOR_TYPE] = [obj_prior_type] + record[OBJECTIVE_PRIOR_TYPE] = obj_prior_type if obj_prior_pars is not None: if not isinstance(obj_prior_pars, str): obj_prior_pars = PARAMETER_SEPARATOR.join( map(str, obj_prior_pars) ) - record[OBJECTIVE_PRIOR_PARAMETERS] = [obj_prior_pars] + record[OBJECTIVE_PRIOR_PARAMETERS] = obj_prior_pars record.update(kwargs) - tmp_df = pd.DataFrame(record).set_index([PARAMETER_ID]) - self.parameter_df = ( - pd.concat([self.parameter_df, tmp_df]) - if self.parameter_df is not None - else tmp_df - ) + self.parameter_table += core.Parameter(**record) def add_measurement( self, @@ -1014,6 +1078,32 @@ def add_experiment(self, id_: str, *args): else tmp_df ) + def __iadd__(self, other): + """Add Observable, Parameter, Measurement, Condition, or Experiment""" + from .core import ( + Condition, + Experiment, + Measurement, + Observable, + Parameter, + ) + + if isinstance(other, Observable): + self.observables_table += other + elif isinstance(other, Parameter): + self.parameter_table += other + elif isinstance(other, Measurement): + self.measurement_table += other + elif isinstance(other, Condition): + self.conditions_table += other + elif isinstance(other, Experiment): + self.experiments_table += other + else: + raise ValueError( + f"Cannot add object of type {type(other)} to Problem." + ) + return self + class ModelFile(BaseModel): """A file in the PEtab problem configuration.""" diff --git a/pytest.ini b/pytest.ini index 4aa44158..52eea778 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,8 @@ [pytest] -addopts = --doctest-modules +addopts = --doctest-modules --durations=0 --durations-min=10 +testpaths = + petab + tests filterwarnings = error # TODO: until tests are reorganized for petab.v1 diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index a7eae851..39e65c37 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -3,7 +3,7 @@ from petab.v2.core import ( Change, - ChangeSet, + Condition, ConditionsTable, Experiment, ExperimentPeriod, @@ -59,11 +59,11 @@ def test_conditions_table_add_changeset(): conditions_table = ConditionsTable() assert conditions_table.conditions == [] - c1 = ChangeSet( + c1 = Condition( id="condition1", changes=[Change(operation_type=OperationType.NO_CHANGE)], ) - c2 = ChangeSet( + c2 = Condition( id="condition2", changes=[Change(operation_type=OperationType.NO_CHANGE)], ) diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index 04e394ad..b3cc4e23 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -130,12 +130,14 @@ def test_modify_problem(): exp_observable_df = pd.DataFrame( data={ OBSERVABLE_ID: ["observable1", "observable2"], - OBSERVABLE_FORMULA: ["1", "2"], + OBSERVABLE_FORMULA: [1, 2], NOISE_FORMULA: [np.nan, 2.2], } ).set_index([OBSERVABLE_ID]) assert_frame_equal( - problem.observable_df, exp_observable_df, check_dtype=False + problem.observable_df[[OBSERVABLE_FORMULA, NOISE_FORMULA]], + exp_observable_df, + check_dtype=False, ) problem.add_parameter("parameter1", 1, 0, lb=1, ub=2) @@ -151,7 +153,11 @@ def test_modify_problem(): } ).set_index([PARAMETER_ID]) assert_frame_equal( - problem.parameter_df, exp_parameter_df, check_dtype=False + problem.parameter_df[ + [ESTIMATE, NOMINAL_VALUE, LOWER_BOUND, UPPER_BOUND] + ], + exp_parameter_df, + check_dtype=False, ) problem.add_mapping("new_petab_id", "some_model_entity_id") From 2e26c8d979560a1c2baa04eba7b3c137006e80e1 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 20 Mar 2025 14:08:12 +0100 Subject: [PATCH 03/19] .. --- petab/v2/core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/petab/v2/core.py b/petab/v2/core.py index 62a6040f..0d43a33d 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -207,6 +207,9 @@ def to_df(self) -> pd.DataFrame: record[C.OBSERVABLE_FORMULA] = ( None if obs is None + # TODO: we need a custom printer for sympy expressions + # to avoid '**' + # https://github.com/PEtab-dev/libpetab-python/issues/362 else str(obs) if not obs.is_number else float(obs) From e5e109718ff61114ad4f1034eaac947dd6135fdc Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 21 Mar 2025 14:31:09 +0100 Subject: [PATCH 04/19] lint --- petab/v2/core.py | 26 +++++++++++++-- petab/v2/lint.py | 78 +++++++++++++++++-------------------------- petab/v2/problem.py | 47 ++++++++++++++++++-------- tests/v2/test_core.py | 6 ++-- tests/v2/test_lint.py | 17 +++------- 5 files changed, 94 insertions(+), 80 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 0d43a33d..b12c3ce7 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -441,7 +441,8 @@ class ExperimentPeriod(BaseModel): """ #: The start time of the period in time units as defined in the model. - start: float = Field(alias=C.TIME) + # TODO: Only finite times and -inf are allowed as start time + time: float = Field(alias=C.TIME) # TODO: decide if optional #: The ID of the condition to be applied at the start time. condition_id: str = Field(alias=C.CONDITION_ID) @@ -519,7 +520,7 @@ def from_df(cls, df: pd.DataFrame) -> ExperimentsTable: for experiment_id, cur_exp_df in df.groupby(C.EXPERIMENT_ID): periods = [ ExperimentPeriod( - start=row[C.TIME], condition_id=row[C.CONDITION_ID] + time=row[C.TIME], condition_id=row[C.CONDITION_ID] ) for _, row in cur_exp_df.iterrows() ] @@ -567,6 +568,13 @@ def __iadd__(self, other: Experiment) -> ExperimentsTable: self.experiments.append(other) return self + def __getitem__(self, item): + """Get an experiment by ID.""" + for experiment in self.experiments: + if experiment.id == item: + return experiment + raise KeyError(f"Experiment ID {item} not found") + class Measurement(BaseModel): """A measurement. @@ -770,6 +778,13 @@ def __iadd__(self, other: Mapping) -> MappingTable: self.mappings.append(other) return self + def __getitem__(self, petab_id: str) -> Mapping: + """Get a mapping by PEtab ID.""" + for mapping in self.mappings: + if mapping.petab_id == petab_id: + return mapping + raise KeyError(f"PEtab ID {petab_id} not found") + class Parameter(BaseModel): """Parameter definition.""" @@ -862,3 +877,10 @@ def __iadd__(self, other: Parameter) -> ParameterTable: raise TypeError("Can only add Parameter to ParameterTable") self.parameters.append(other) return self + + def __getitem__(self, item) -> Parameter: + """Get a parameter by ID.""" + for parameter in self.parameters: + if parameter.id == item: + return parameter + raise KeyError(f"Parameter ID {item} not found") diff --git a/petab/v2/lint.py b/petab/v2/lint.py index b8747708..5e753aa6 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -4,7 +4,7 @@ import logging from abc import ABC, abstractmethod -from collections import OrderedDict +from collections import Counter, OrderedDict from collections.abc import Set from dataclasses import dataclass, field from enum import IntEnum @@ -496,29 +496,19 @@ class CheckExperimentTable(ValidationTask): """A task to validate the experiment table of a PEtab problem.""" def run(self, problem: Problem) -> ValidationIssue | None: - if problem.experiment_df is None: - return - - df = problem.experiment_df - - try: - _check_df(df, EXPERIMENT_DF_REQUIRED_COLS, "experiment") - except AssertionError as e: - return ValidationError(str(e)) + messages = [] + for experiment in problem.experiments_table.experiments: + # Check that there are no duplicate timepoints + counter = Counter(period.time for period in experiment.periods) + duplicates = {time for time, count in counter.items() if count > 1} + if duplicates: + messages.append( + f"Experiment {experiment.id} contains duplicate " + f"timepoints: {duplicates}" + ) - # valid timepoints - invalid = [] - for time in df[TIME].values: - try: - time = float(time) - if not np.isfinite(time) and time != -np.inf: - invalid.append(time) - except ValueError: - invalid.append(time) - if invalid: - return ValidationError( - f"Invalid timepoints in experiment table: {invalid}" - ) + if messages: + return ValidationError("\n".join(messages)) class CheckExperimentConditionsExist(ValidationTask): @@ -526,32 +516,24 @@ class CheckExperimentConditionsExist(ValidationTask): in the condition table.""" def run(self, problem: Problem) -> ValidationIssue | None: - if problem.experiment_df is None: - return - - if ( - problem.condition_df is None - and problem.experiment_df is not None - and not problem.experiment_df.empty - ): - return ValidationError( - "Experiment table is non-empty, " - "but condition table is missing." - ) - - required_conditions = problem.experiment_df[CONDITION_ID].unique() - existing_conditions = problem.condition_df[CONDITION_ID].unique() + messages = [] + available_conditions = { + c.id + for c in problem.conditions_table.conditions + if not pd.isna(c.id) + } + for experiment in problem.experiments_table.experiments: + missing_conditions = { + period.condition_id for period in experiment.periods + } - available_conditions + if missing_conditions: + messages.append( + f"Experiment {experiment.id} requires conditions that are " + f"not present in the condition table: {missing_conditions}" + ) - missing_conditions = set(required_conditions) - set( - existing_conditions - ) - # TODO NA allowed? - missing_conditions = {x for x in missing_conditions if not pd.isna(x)} - if missing_conditions: - return ValidationError( - f"Experiment table contains conditions that are not present " - f"in the condition table: {missing_conditions}" - ) + if messages: + return ValidationError("\n".join(messages)) class CheckAllParametersPresentInParameterTable(ValidationTask): diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 383a0cb9..4132abf1 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -153,6 +153,32 @@ def __str__(self): f"{observables}, {measurements}, {parameters}" ) + def __getitem__(self, key): + """Get PEtab entity by ID. + + This allows accessing PEtab entities such as conditions, experiments, + observables, and parameters by their ID. + + Accessing model entities is not currently not supported. + """ + for table in ( + self.conditions_table, + self.experiments_table, + self.observables_table, + self.measurement_table, + self.parameter_table, + self.mapping_table, + ): + if table is not None: + try: + return table[key] + except KeyError: + pass + + raise KeyError( + f"Entity with ID '{key}' not found in the PEtab problem" + ) + @staticmethod def from_yaml( yaml_config: dict | Path | str, base_path: str | Path = None @@ -1062,20 +1088,13 @@ def add_experiment(self, id_: str, *args): "Arguments must be pairs of timepoints and condition IDs." ) - records = [] - for i in range(0, len(args), 2): - records.append( - { - EXPERIMENT_ID: id_, - TIME: args[i], - CONDITION_ID: args[i + 1], - } - ) - tmp_df = pd.DataFrame(records) - self.experiment_df = ( - pd.concat([self.experiment_df, tmp_df]) - if self.experiment_df is not None - else tmp_df + periods = [ + core.ExperimentPeriod(time=args[i], condition_id=args[i + 1]) + for i in range(0, len(args), 2) + ] + + self.experiments_table.experiments.append( + core.Experiment(id=id_, periods=periods) ) def __iadd__(self, other): diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 39e65c37..07eeb6c3 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -42,9 +42,9 @@ def test_experiment_add_periods(): exp = Experiment(id="exp1") assert exp.periods == [] - p1 = ExperimentPeriod(start=0, condition_id="p1") - p2 = ExperimentPeriod(start=1, condition_id="p2") - p3 = ExperimentPeriod(start=2, condition_id="p3") + p1 = ExperimentPeriod(time=0, condition_id="p1") + p2 = ExperimentPeriod(time=1, condition_id="p2") + p3 = ExperimentPeriod(time=2, condition_id="p3") exp += p1 exp += p2 diff --git a/tests/v2/test_lint.py b/tests/v2/test_lint.py index db0c402a..33cdb300 100644 --- a/tests/v2/test_lint.py +++ b/tests/v2/test_lint.py @@ -10,23 +10,14 @@ def test_check_experiments(): """Test ``CheckExperimentTable``.""" problem = Problem() - problem.add_experiment("e1", 0, "c1", 1, "c2") - problem.add_experiment("e2", "-inf", "c1", 1, "c2") - assert problem.experiment_df.shape == (4, 3) check = CheckExperimentTable() assert check.run(problem) is None - assert check.run(Problem()) is None - - tmp_problem = deepcopy(problem) - tmp_problem.experiment_df.loc[0, TIME] = "invalid" - assert check.run(tmp_problem) is not None - - tmp_problem = deepcopy(problem) - tmp_problem.experiment_df.loc[0, TIME] = "inf" - assert check.run(tmp_problem) is not None + problem.add_experiment("e1", 0, "c1", 1, "c2") + problem.add_experiment("e2", "-inf", "c1", 1, "c2") + assert check.run(problem) is None tmp_problem = deepcopy(problem) - tmp_problem.experiment_df.drop(columns=[TIME], inplace=True) + tmp_problem["e1"].periods[0].time = tmp_problem["e1"].periods[1].time assert check.run(tmp_problem) is not None From 7ec2a9ba0cb99caa9ad46f1ede7932246acebea1 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 21 Mar 2025 16:12:25 +0100 Subject: [PATCH 05/19] no conditionName, no operationType --- petab/v1/lint.py | 11 +++-- petab/v1/math/sympify.py | 8 +++- petab/v2/C.py | 17 ++------ petab/v2/core.py | 93 ++++++++++++++++++++++------------------ petab/v2/lint.py | 91 ++++++++++++++++++++++++++++++++------- petab/v2/petab1to2.py | 7 ++- petab/v2/problem.py | 76 +++++++++++++++----------------- tests/v2/test_core.py | 9 ++-- tests/v2/test_problem.py | 9 ++-- 9 files changed, 188 insertions(+), 133 deletions(-) diff --git a/petab/v1/lint.py b/petab/v1/lint.py index b2260b83..acc1a523 100644 --- a/petab/v1/lint.py +++ b/petab/v1/lint.py @@ -1041,10 +1041,13 @@ def assert_model_parameters_in_condition_or_parameter_table( mapping_df[MODEL_ENTITY_ID], strict=True, ) - # mapping table entities mapping to already allowed parameters - if to_id in allowed_in_condition_cols - # mapping table entities mapping to species - or model.is_state_variable(to_id) + if not pd.isna(to_id) + and ( + # mapping table entities mapping to already allowed parameters + to_id in allowed_in_condition_cols + # mapping table entities mapping to species + or model.is_state_variable(to_id) + ) } allowed_in_parameter_table = ( diff --git a/petab/v1/math/sympify.py b/petab/v1/math/sympify.py index cc81a000..2f6aabf0 100644 --- a/petab/v1/math/sympify.py +++ b/petab/v1/math/sympify.py @@ -31,9 +31,13 @@ def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic: if isinstance(expr, float) or isinstance(expr, np.floating): return sp.Float(expr) - # Set error listeners - input_stream = InputStream(expr) + try: + input_stream = InputStream(expr) + except TypeError as e: + raise TypeError(f"Error parsing {expr!r}: {e.args[0]}") from e + lexer = PetabMathExprLexer(input_stream) + # Set error listeners lexer.removeErrorListeners() lexer.addErrorListener(MathErrorListener()) diff --git a/petab/v2/C.py b/petab/v2/C.py index 617977c1..545124a4 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -125,28 +125,14 @@ #: Condition ID column in the condition table CONDITION_ID = "conditionId" -# TODO: removed? -#: Condition name column in the condition table -CONDITION_NAME = "conditionName" #: Column in the condition table with the ID of an entity that is changed TARGET_ID = "targetId" -#: Column in the condition table with the operation type -OPERATION_TYPE = "operationType" #: Column in the condition table with the new value of the target entity TARGET_VALUE = "targetValue" -# operation types: -OT_CUR_VAL = "setCurrentValue" -OT_NO_CHANGE = "noChange" - -OPERATION_TYPES = [ - OT_CUR_VAL, - OT_NO_CHANGE, -] CONDITION_DF_COLS = [ CONDITION_ID, TARGET_ID, - OPERATION_TYPE, TARGET_VALUE, ] @@ -382,6 +368,9 @@ PETAB_ENTITY_ID = "petabEntityId" #: Model entity ID column in the mapping table MODEL_ENTITY_ID = "modelEntityId" +#: Arbitrary name +NAME = "name" + #: Required columns of the mapping table MAPPING_DF_REQUIRED_COLS = [PETAB_ENTITY_ID, MODEL_ENTITY_ID] diff --git a/petab/v2/core.py b/petab/v2/core.py index b12c3ce7..a27c82f1 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -2,14 +2,18 @@ from __future__ import annotations +from collections.abc import Sequence from enum import Enum from pathlib import Path +from typing import Annotated import numpy as np import pandas as pd import sympy as sp from pydantic import ( + AfterValidator, BaseModel, + BeforeValidator, ConfigDict, Field, ValidationInfo, @@ -29,7 +33,6 @@ "Change", "Condition", "ConditionsTable", - "OperationType", "ExperimentPeriod", "Experiment", "ExperimentsTable", @@ -43,6 +46,20 @@ ] +def is_finite_or_neg_inf(v: float, info: ValidationInfo) -> float: + if not np.isfinite(v) and v != -np.inf: + raise ValueError( + f"{info.field_name} value must be finite or -inf but got {v}" + ) + return v + + +def _convert_nan_to_none(v): + if isinstance(v, float) and np.isnan(v): + return None + return v + + class ObservableTransformation(str, Enum): """Observable transformation types. @@ -248,16 +265,6 @@ def __iadd__(self, other: Observable) -> ObservablesTable: return self -# TODO remove?! -class OperationType(str, Enum): - """Operation types for model changes in the PEtab conditions table.""" - - # TODO update names - SET_CURRENT_VALUE = "setCurrentValue" - NO_CHANGE = "noChange" - ... - - class Change(BaseModel): """A change to the model or model state. @@ -266,17 +273,13 @@ class Change(BaseModel): >>> Change( ... target_id="k1", - ... operation_type=OperationType.SET_CURRENT_VALUE, ... target_value="10", ... ) # doctest: +NORMALIZE_WHITESPACE - Change(target_id='k1', operation_type='setCurrentValue', - target_value=10.0000000000000) + Change(target_id='k1', target_value=10.0000000000000) """ #: The ID of the target entity to change. target_id: str | None = Field(alias=C.TARGET_ID, default=None) - # TODO: remove?! - operation_type: OperationType = Field(alias=C.OPERATION_TYPE) #: The value to set the target entity to. target_value: sp.Basic | None = Field(alias=C.TARGET_VALUE, default=None) @@ -290,14 +293,11 @@ class Change(BaseModel): @model_validator(mode="before") @classmethod def _validate_id(cls, data: dict): - if ( - data.get("operation_type", data.get(C.OPERATION_TYPE)) - != C.OT_NO_CHANGE - ): - target_id = data.get("target_id", data.get(C.TARGET_ID)) - - if not is_valid_identifier(target_id): - raise ValueError(f"Invalid ID: {target_id}") + target_id = data.get("target_id", data.get(C.TARGET_ID)) + + if not is_valid_identifier(target_id): + raise ValueError(f"Invalid ID: {target_id}") + return data @field_validator("target_value", mode="before") @@ -323,13 +323,12 @@ class Condition(BaseModel): ... changes=[ ... Change( ... target_id="k1", - ... operation_type=OperationType.SET_CURRENT_VALUE, ... target_value="10", ... ) ... ], ... ) # doctest: +NORMALIZE_WHITESPACE - Condition(id='condition1', changes=[Change(target_id='k1', - operation_type='setCurrentValue', target_value=10.0000000000000)]) + Condition(id='condition1', + changes=[Change(target_id='k1', target_value=10.0000000000000)]) """ #: The condition ID. @@ -352,13 +351,13 @@ def _validate_id(cls, v): def __add__(self, other: Change) -> Condition: """Add a change to the set.""" if not isinstance(other, Change): - raise TypeError("Can only add Change to ChangeSet") + raise TypeError("Can only add Change to Condition") return Condition(id=self.id, changes=self.changes + [other]) def __iadd__(self, other: Change) -> Condition: """Add a change to the set in place.""" if not isinstance(other, Change): - raise TypeError("Can only add Change to ChangeSet") + raise TypeError("Can only add Change to Condition") self.changes.append(other) return self @@ -379,11 +378,11 @@ def __getitem__(self, condition_id: str) -> Condition: @classmethod def from_df(cls, df: pd.DataFrame) -> ConditionsTable: """Create a ConditionsTable from a DataFrame.""" - if df is None: + if df is None or df.empty: return cls(conditions=[]) conditions = [] - for condition_id, sub_df in df.groupby(C.CONDITION_ID): + for condition_id, sub_df in df.reset_index().groupby(C.CONDITION_ID): changes = [Change(**row.to_dict()) for _, row in sub_df.iterrows()] conditions.append(Condition(id=condition_id, changes=changes)) @@ -422,13 +421,13 @@ def to_tsv(self, file_path: str | Path) -> None: def __add__(self, other: Condition) -> ConditionsTable: """Add a condition to the table.""" if not isinstance(other, Condition): - raise TypeError("Can only add ChangeSet to ConditionsTable") + raise TypeError("Can only add Conditions to ConditionsTable") return ConditionsTable(conditions=self.conditions + [other]) def __iadd__(self, other: Condition) -> ConditionsTable: """Add a condition to the table in place.""" if not isinstance(other, Condition): - raise TypeError("Can only add ChangeSet to ConditionsTable") + raise TypeError("Can only add Conditions to ConditionsTable") self.conditions.append(other) return self @@ -441,11 +440,11 @@ class ExperimentPeriod(BaseModel): """ #: The start time of the period in time units as defined in the model. - # TODO: Only finite times and -inf are allowed as start time - time: float = Field(alias=C.TIME) - # TODO: decide if optional + time: Annotated[float, AfterValidator(is_finite_or_neg_inf)] = Field( + alias=C.TIME + ) #: The ID of the condition to be applied at the start time. - condition_id: str = Field(alias=C.CONDITION_ID) + condition_id: str | None = Field(alias=C.CONDITION_ID, default=None) #: :meta private: model_config = ConfigDict(populate_by_name=True) @@ -453,9 +452,8 @@ class ExperimentPeriod(BaseModel): @field_validator("condition_id", mode="before") @classmethod def _validate_id(cls, condition_id): - # TODO to be decided if optional - if pd.isna(condition_id): - return "" + if pd.isna(condition_id) or not condition_id: + return None # if not condition_id: # raise ValueError("ID must not be empty.") if not is_valid_identifier(condition_id): @@ -633,12 +631,17 @@ def _validate_id(cls, v, info: ValidationInfo): ) @classmethod def _sympify_list(cls, v): + if v is None: + return [] + if isinstance(v, float) and np.isnan(v): return [] + if isinstance(v, str): v = v.split(C.PARAMETER_SEPARATOR) - else: + elif not isinstance(v, Sequence): v = [v] + return [sympify_petab(x) for x in v] @@ -710,7 +713,13 @@ class Mapping(BaseModel): #: PEtab entity ID. petab_id: str = Field(alias=C.PETAB_ENTITY_ID) #: Model entity ID. - model_id: str = Field(alias=C.MODEL_ENTITY_ID) + model_id: Annotated[str | None, BeforeValidator(_convert_nan_to_none)] = ( + Field(alias=C.MODEL_ENTITY_ID, default=None) + ) + #: Arbitrary name + name: Annotated[str | None, BeforeValidator(_convert_nan_to_none)] = Field( + alias=C.NAME, default=None + ) #: :meta private: model_config = ConfigDict(populate_by_name=True) diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 5e753aa6..30115130 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -101,6 +101,18 @@ def __post_init__(self): def __str__(self): return f"{self.level.name}: {self.message}" + def _get_task_name(self): + """Get the name of the ValidationTask that raised this error.""" + import inspect + + # walk up the stack until we find the ValidationTask.run method + for frame_info in inspect.stack(): + frame = frame_info.frame + if "self" in frame.f_locals: + task = frame.f_locals["self"] + if isinstance(task, ValidationTask): + return task.__class__.__name__ + @dataclass class ValidationError(ValidationIssue): @@ -115,17 +127,19 @@ def __post_init__(self): if self.task is None: self.task = self._get_task_name() - def _get_task_name(self): - """Get the name of the ValidationTask that raised this error.""" - import inspect - # walk up the stack until we find the ValidationTask.run method - for frame_info in inspect.stack(): - frame = frame_info.frame - if "self" in frame.f_locals: - task = frame.f_locals["self"] - if isinstance(task, ValidationTask): - return task.__class__.__name__ +@dataclass +class ValidationWarning(ValidationIssue): + """A validation result with level WARNING.""" + + level: ValidationIssueSeverity = field( + default=ValidationIssueSeverity.WARNING, init=False + ) + task: str | None = None + + def __post_init__(self): + if self.task is None: + self.task = self._get_task_name() class ValidationResultList(list[ValidationIssue]): @@ -518,13 +532,13 @@ class CheckExperimentConditionsExist(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: messages = [] available_conditions = { - c.id - for c in problem.conditions_table.conditions - if not pd.isna(c.id) + c.id for c in problem.conditions_table.conditions } for experiment in problem.experiments_table.experiments: missing_conditions = { - period.condition_id for period in experiment.periods + period.condition_id + for period in experiment.periods + if period.condition_id is not None } - available_conditions if missing_conditions: messages.append( @@ -617,6 +631,51 @@ def run(self, problem: Problem) -> ValidationIssue | None: ) +class CheckUnusedExperiments(ValidationTask): + """A task to check for experiments that are not used in the measurements + table.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + used_experiments = { + m.experiment_id + for m in problem.measurement_table.measurements + if m.experiment_id is not None + } + available_experiments = { + e.id for e in problem.experiments_table.experiments + } + + unused_experiments = available_experiments - used_experiments + if unused_experiments: + return ValidationWarning( + f"Experiments {unused_experiments} are not used in the " + "measurements table." + ) + + +class CheckUnusedConditions(ValidationTask): + """A task to check for conditions that are not used in the experiments + table.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + used_conditions = { + p.condition_id + for e in problem.experiments_table.experiments + for p in e.periods + if p.condition_id is not None + } + available_conditions = { + c.id for c in problem.conditions_table.conditions + } + + unused_conditions = available_conditions - used_conditions + if unused_conditions: + return ValidationWarning( + f"Conditions {unused_conditions} are not used in the " + "experiments table." + ) + + class CheckVisualizationTable(ValidationTask): """A task to validate the visualization table of a PEtab problem.""" @@ -672,7 +731,7 @@ def get_valid_parameters_for_parameter_table( blackset |= placeholders if condition_df is not None: - blackset |= set(condition_df.columns.values) - {CONDITION_NAME} + blackset |= set(condition_df.columns.values) # don't use sets here, to have deterministic ordering, # e.g. for creating parameter tables @@ -836,4 +895,6 @@ def append_overrides(overrides): # TODO: atomize checks, update to long condition table, re-enable # CheckVisualizationTable(), CheckValidParameterInConditionOrParameterTable(), + CheckUnusedExperiments(), + CheckUnusedConditions(), ] diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 7f675db0..102097c1 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -226,8 +226,9 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: validation_issues = v2.lint_problem(new_yaml_file) if validation_issues: + validation_issues = "\n".join(map(str, validation_issues)) raise ValueError( - "Generated PEtab v2 problem did not pass linting: " + "The generated PEtab v2 problem did not pass linting: " f"{validation_issues}" ) @@ -287,7 +288,7 @@ def v1v2_condition_df( condition_df = condition_df.copy().reset_index() with suppress(KeyError): # conditionName was dropped in PEtab v2 - condition_df.drop(columns=[v2.C.CONDITION_NAME], inplace=True) + condition_df.drop(columns=[v1.C.CONDITION_NAME], inplace=True) condition_df = condition_df.melt( id_vars=[v1.C.CONDITION_ID], @@ -301,7 +302,6 @@ def v1v2_condition_df( columns=[ v2.C.CONDITION_ID, v2.C.TARGET_ID, - v2.C.OPERATION_TYPE, v2.C.TARGET_VALUE, ] ) @@ -320,5 +320,4 @@ def v1v2_condition_df( f"Unable to determine value type {target} in the condition " "table." ) - condition_df[v2.C.OPERATION_TYPE] = v2.C.OT_CUR_VAL return condition_df diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 4132abf1..bbf28fd9 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -13,6 +13,7 @@ from typing import TYPE_CHECKING import pandas as pd +import sympy as sp from pydantic import AnyUrl, BaseModel, Field from ..v1 import ( @@ -883,7 +884,7 @@ def validate( return validation_results def add_condition( - self, id_: str, name: str = None, **kwargs: tuple[str, Number | str] + self, id_: str, name: str = None, **kwargs: Number | str | sp.Expr ): """Add a simulation condition to the problem. @@ -895,27 +896,20 @@ def add_condition( """ if not kwargs: return - records = [ - { - CONDITION_ID: id_, - TARGET_ID: target_id, - OPERATION_TYPE: "setCurrentValue", - TARGET_VALUE: target_value - if not isinstance(target_value, tuple) - else target_value[1], - } + changes = [ + core.Change(target_id=target_id, target_value=target_value) for target_id, target_value in kwargs.items() ] - # TODO: is the condition name supported in v2? - if name is not None: - for record in records: - record[CONDITION_NAME] = [name] - tmp_df = pd.DataFrame(records) - self.condition_df = ( - pd.concat([self.condition_df, tmp_df], ignore_index=True) - if self.condition_df is not None - else tmp_df + self.conditions_table.conditions.append( + core.Condition(id=id_, changes=changes) ) + if name is not None: + self.mapping_table.mappings.append( + core.Mapping( + petab_id=id_, + name=name, + ) + ) def add_observable( self, @@ -1023,8 +1017,8 @@ def add_measurement( experiment_id: str, time: float, measurement: float, - observable_parameters: Sequence[str | float] = None, - noise_parameters: Sequence[str | float] = None, + observable_parameters: Sequence[str | float] | str | float = None, + noise_parameters: Sequence[str | float] | str | float = None, ): """Add a measurement to the problem. @@ -1036,27 +1030,25 @@ def add_measurement( observable_parameters: The observable parameters noise_parameters: The noise parameters """ - record = { - OBSERVABLE_ID: [obs_id], - EXPERIMENT_ID: [experiment_id], - TIME: [time], - MEASUREMENT: [measurement], - } - if observable_parameters is not None: - record[OBSERVABLE_PARAMETERS] = [ - PARAMETER_SEPARATOR.join(map(str, observable_parameters)) - ] - if noise_parameters is not None: - record[NOISE_PARAMETERS] = [ - PARAMETER_SEPARATOR.join(map(str, noise_parameters)) - ] - - tmp_df = pd.DataFrame(record) - self.measurement_df = ( - pd.concat([self.measurement_df, tmp_df]) - if self.measurement_df is not None - else tmp_df - ).reset_index(drop=True) + if observable_parameters is not None and not isinstance( + observable_parameters, Sequence + ): + observable_parameters = [observable_parameters] + if noise_parameters is not None and not isinstance( + noise_parameters, Sequence + ): + noise_parameters = [noise_parameters] + + self.measurement_table.measurements.append( + core.Measurement( + observable_id=obs_id, + experiment_id=experiment_id, + time=time, + measurement=measurement, + observable_parameters=observable_parameters, + noise_parameters=noise_parameters, + ) + ) def add_mapping(self, petab_id: str, model_id: str): """Add a mapping table entry to the problem. diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 07eeb6c3..672b733a 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -1,6 +1,8 @@ import tempfile from pathlib import Path +import sympy as sp + from petab.v2.core import ( Change, Condition, @@ -8,7 +10,6 @@ Experiment, ExperimentPeriod, ObservablesTable, - OperationType, ) from petab.v2.petab1to2 import petab1to2 @@ -55,17 +56,17 @@ def test_experiment_add_periods(): assert exp.periods == [p1, p2] -def test_conditions_table_add_changeset(): +def test_conditions_table_add_changes(): conditions_table = ConditionsTable() assert conditions_table.conditions == [] c1 = Condition( id="condition1", - changes=[Change(operation_type=OperationType.NO_CHANGE)], + changes=[Change(target_id="k1", target_value=1)], ) c2 = Condition( id="condition2", - changes=[Change(operation_type=OperationType.NO_CHANGE)], + changes=[Change(target_id="k2", target_value=sp.sympify("2 * x"))], ) conditions_table += c1 diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index b3cc4e23..a08af22c 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -16,8 +16,6 @@ NOMINAL_VALUE, OBSERVABLE_FORMULA, OBSERVABLE_ID, - OPERATION_TYPE, - OT_CUR_VAL, PARAMETER_ID, PETAB_ENTITY_ID, TARGET_ID, @@ -73,7 +71,7 @@ def test_problem_from_yaml_multiple_files(): for i in (1, 2): problem = Problem() - problem.add_condition(f"condition{i}", parameter1=(OT_CUR_VAL, i)) + problem.add_condition(f"condition{i}", parameter1=i) petab.write_condition_df( problem.condition_df, Path(tmpdir, f"conditions{i}.tsv") ) @@ -109,14 +107,13 @@ def test_problem_from_yaml_multiple_files(): def test_modify_problem(): """Test modifying a problem via the API.""" problem = Problem() - problem.add_condition("condition1", parameter1=(OT_CUR_VAL, 1)) - problem.add_condition("condition2", parameter2=(OT_CUR_VAL, 2)) + problem.add_condition("condition1", parameter1=1) + problem.add_condition("condition2", parameter2=2) exp_condition_df = pd.DataFrame( data={ CONDITION_ID: ["condition1", "condition2"], TARGET_ID: ["parameter1", "parameter2"], - OPERATION_TYPE: [OT_CUR_VAL, OT_CUR_VAL], TARGET_VALUE: [1.0, 2.0], } ) From 228a958c6f46e0e6a39e5b2cf09217c0eb46185f Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Sat, 22 Mar 2025 08:13:18 +0100 Subject: [PATCH 06/19] more linting --- petab/v2/conditions.py | 17 +- petab/v2/core.py | 63 ++++- petab/v2/lint.py | 519 ++++++++++++++++++++++----------------- petab/v2/petab1to2.py | 16 +- petab/v2/problem.py | 45 +--- tests/v2/test_problem.py | 2 + 6 files changed, 388 insertions(+), 274 deletions(-) diff --git a/petab/v2/conditions.py b/petab/v2/conditions.py index 8d5a3067..90724d04 100644 --- a/petab/v2/conditions.py +++ b/petab/v2/conditions.py @@ -2,13 +2,13 @@ from __future__ import annotations +from itertools import chain from pathlib import Path import pandas as pd import sympy as sp from .. import v2 -from ..v1.math import sympify_petab from .C import * from .lint import assert_no_leading_trailing_whitespace @@ -59,10 +59,11 @@ def get_condition_table_free_symbols(problem: v2.Problem) -> set[sp.Basic]: :returns: Set of free symbols. """ - if problem.condition_df is None: - return set() - - free_symbols = set() - for target_value in problem.condition_df[TARGET_VALUE]: - free_symbols |= sympify_petab(target_value).free_symbols - return free_symbols + return set( + chain.from_iterable( + change.target_value.free_symbols + for condition in problem.conditions_table.conditions + for change in condition.changes + if change.target_value is not None + ) + ) diff --git a/petab/v2/core.py b/petab/v2/core.py index a27c82f1..05825b73 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -2,10 +2,11 @@ from __future__ import annotations +import re from collections.abc import Sequence from enum import Enum from pathlib import Path -from typing import Annotated +from typing import Annotated, Literal import numpy as np import pandas as pd @@ -46,7 +47,7 @@ ] -def is_finite_or_neg_inf(v: float, info: ValidationInfo) -> float: +def _is_finite_or_neg_inf(v: float, info: ValidationInfo) -> float: if not np.isfinite(v) and v != -np.inf: raise ValueError( f"{info.field_name} value must be finite or -inf but got {v}" @@ -54,6 +55,12 @@ def is_finite_or_neg_inf(v: float, info: ValidationInfo) -> float: return v +def _not_nan(v: float, info: ValidationInfo) -> float: + if np.isnan(v): + raise ValueError(f"{info.field_name} value must not be nan.") + return v + + def _convert_nan_to_none(v): if isinstance(v, float) and np.isnan(v): return None @@ -149,6 +156,11 @@ class Observable(BaseModel): alias=C.NOISE_DISTRIBUTION, default=NoiseDistribution.NORMAL ) + #: :meta private: + model_config = ConfigDict( + arbitrary_types_allowed=True, populate_by_name=True + ) + @field_validator("id") @classmethod def _validate_id(cls, v): @@ -183,10 +195,31 @@ def _sympify(cls, v): return sympify_petab(v) - #: :meta private: - model_config = ConfigDict( - arbitrary_types_allowed=True, populate_by_name=True - ) + def _placeholders( + self, type_: Literal["observable", "noise"] + ) -> set[sp.Symbol]: + # TODO: add field validator to check for 1-based consecutive numbering + t = f"{re.escape(type_)}Parameter" + o = re.escape(self.id) + pattern = re.compile(rf"(?:^|\W)({t}\d+_{o})(?=\W|$)") + formula = ( + self.formula + if type_ == "observable" + else self.noise_formula + if type_ == "noise" + else None + ) + return {s for s in formula.free_symbols if pattern.match(str(s))} + + @property + def observable_placeholders(self) -> set[sp.Symbol]: + """Placeholder symbols for the observable formula.""" + return self._placeholders("observable") + + @property + def noise_placeholders(self) -> set[sp.Symbol]: + """Placeholder symbols for the noise formula.""" + return self._placeholders("noise") class ObservablesTable(BaseModel): @@ -440,7 +473,7 @@ class ExperimentPeriod(BaseModel): """ #: The start time of the period in time units as defined in the model. - time: Annotated[float, AfterValidator(is_finite_or_neg_inf)] = Field( + time: Annotated[float, AfterValidator(_is_finite_or_neg_inf)] = Field( alias=C.TIME ) #: The ID of the condition to be applied at the start time. @@ -588,7 +621,9 @@ class Measurement(BaseModel): #: The time point of the measurement in time units as defined in the model. time: float = Field(alias=C.TIME) #: The measurement value. - measurement: float = Field(alias=C.MEASUREMENT) + measurement: Annotated[float, AfterValidator(_not_nan)] = Field( + alias=C.MEASUREMENT + ) #: Values for placeholder parameters in the observable formula. observable_parameters: list[sp.Basic] = Field( alias=C.OBSERVABLE_PARAMETERS, default_factory=list @@ -794,6 +829,13 @@ def __getitem__(self, petab_id: str) -> Mapping: return mapping raise KeyError(f"PEtab ID {petab_id} not found") + def get(self, petab_id, default=None): + """Get a mapping by PEtab ID or return a default value.""" + try: + return self[petab_id] + except KeyError: + return default + class Parameter(BaseModel): """Parameter definition.""" @@ -893,3 +935,8 @@ def __getitem__(self, item) -> Parameter: if parameter.id == item: return parameter raise KeyError(f"Parameter ID {item} not found") + + @property + def n_estimated(self) -> int: + """Number of estimated parameters.""" + return sum(p.estimate for p in self.parameters) diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 30115130..75cf131c 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -12,13 +12,11 @@ import numpy as np import pandas as pd +import sympy as sp from .. import v2 from ..v1.lint import ( _check_df, - assert_measured_observables_defined, - assert_measurements_not_null, - assert_measurements_numeric, assert_model_parameters_in_condition_or_parameter_table, assert_no_leading_trailing_whitespace, assert_parameter_bounds_are_numeric, @@ -27,17 +25,11 @@ assert_parameter_prior_parameters_are_valid, assert_parameter_prior_type_is_valid, assert_parameter_scale_is_valid, - assert_unique_observable_ids, assert_unique_parameter_ids, check_ids, check_observable_df, check_parameter_bounds, ) -from ..v1.measurements import ( - assert_overrides_match_parameter_count, - split_parameter_replacement_list, -) -from ..v1.observables import get_output_parameters, get_placeholders from ..v1.visualize.lint import validate_visualization_df from ..v2.C import * from .problem import Problem @@ -51,9 +43,8 @@ "ValidationError", "ValidationTask", "CheckModel", - "CheckTableExists", - "CheckValidPetabIdColumn", - "CheckMeasurementTable", + "CheckProblemConfig", + "CheckPosLogMeasurements", "CheckConditionTable", "CheckObservableTable", "CheckParameterTable", @@ -62,6 +53,9 @@ "CheckAllParametersPresentInParameterTable", "CheckValidParameterInConditionOrParameterTable", "CheckVisualizationTable", + "CheckUnusedExperiments", + "CheckObservablesDoNotShadowModelEntities", + "CheckUnusedConditions", "lint_problem", "default_validation_tasks", ] @@ -91,6 +85,7 @@ class ValidationIssue: level: ValidationIssueSeverity message: str + task: str | None = None def __post_init__(self): if not isinstance(self.level, ValidationIssueSeverity): @@ -121,7 +116,6 @@ class ValidationError(ValidationIssue): level: ValidationIssueSeverity = field( default=ValidationIssueSeverity.ERROR, init=False ) - task: str | None = None def __post_init__(self): if self.task is None: @@ -135,7 +129,6 @@ class ValidationWarning(ValidationIssue): level: ValidationIssueSeverity = field( default=ValidationIssueSeverity.WARNING, init=False ) - task: str | None = None def __post_init__(self): if self.task is None: @@ -153,17 +146,25 @@ def log( *, logger: logging.Logger = logger, min_level: ValidationIssueSeverity = ValidationIssueSeverity.INFO, + max_level: ValidationIssueSeverity = ValidationIssueSeverity.CRITICAL, ): - """Log the validation results.""" + """Log the validation results. + + :param logger: The logger to use for logging. + Defaults to the module logger. + :param min_level: The minimum severity level to log. + :param max_level: The maximum severity level to log. + """ for result in self: - if result.level < min_level: + if result.level < min_level or result.level > max_level: continue + msg = f"{result.level.name}: {result.message} [{result.task}]" if result.level == ValidationIssueSeverity.INFO: - logger.info(result.message) + logger.info(msg) elif result.level == ValidationIssueSeverity.WARNING: - logger.warning(result.message) + logger.warning(msg) elif result.level >= ValidationIssueSeverity.ERROR: - logger.error(result.message) + logger.error(msg) if not self: logger.info("PEtab format check completed successfully.") @@ -209,6 +210,41 @@ def __call__(self, *args, **kwargs): return self.run(*args, **kwargs) +# TODO: check for uniqueness of all primary keys + + +class CheckProblemConfig(ValidationTask): + """A task to validate the configuration of a PEtab problem. + + This corresponds to checking the problem YAML file semantics. + """ + + def run(self, problem: Problem) -> ValidationIssue | None: + if (config := problem.config) is None or config.base_path is None: + # This is allowed, so we can validate in-memory problems + # that don't have the list of files populated + return None + # TODO: decide when this should be emitted + # return ValidationWarning("Problem configuration is missing.") + + # TODO: we need some option for validating partial vs full problems + # check for unset but required files + missing_files = [] + if not config.parameter_file: + missing_files.append("parameters") + + if not [p.measurement_files for p in config.problems]: + missing_files.append("measurements") + + if not [p.observable_files for p in config.problems]: + missing_files.append("observables") + + if missing_files: + return ValidationError( + f"Missing files: {', '.join(missing_files)}" + ) + + class CheckModel(ValidationTask): """A task to validate the model of a PEtab problem.""" @@ -221,134 +257,133 @@ def run(self, problem: Problem) -> ValidationIssue | None: return ValidationError("Model is invalid.") -class CheckTableExists(ValidationTask): - """A task to check if a table exists in the PEtab problem.""" - - def __init__(self, table_name: str): - if table_name not in ["measurement", "observable", "parameter"]: - # all others are optional - raise ValueError( - f"Table name {table_name} is not supported. " - "Supported table names are 'measurement', 'observable', " - "'parameter'." - ) - self.table_name = table_name +class CheckMeasuredObservablesDefined(ValidationTask): + """A task to check that all observables referenced by the measurements + are defined.""" def run(self, problem: Problem) -> ValidationIssue | None: - if getattr(problem, f"{self.table_name}_df") is None: - return ValidationError(f"{self.table_name} table is missing.") - + used_observables = { + m.observable_id for m in problem.measurement_table.measurements + } + defined_observables = { + o.id for o in problem.observables_table.observables + } + if undefined_observables := (used_observables - defined_observables): + return ValidationError( + f"Observables {undefined_observables} used in " + "measurement table but not defined in observables table." + ) -class CheckValidPetabIdColumn(ValidationTask): - """A task to check that a given column contains only valid PEtab IDs.""" - def __init__( - self, - table_name: str, - column_name: str, - required_column: bool = True, - ignore_nan: bool = False, - ): - self.table_name = table_name - self.column_name = column_name - self.required_column = required_column - self.ignore_nan = ignore_nan +class CheckOverridesMatchPlaceholders(ValidationTask): + """A task to check that the number of observable/noise parameters + in the measurements match the number of placeholders in the observables.""" def run(self, problem: Problem) -> ValidationIssue | None: - df = getattr(problem, f"{self.table_name}_df") - if df is None: - return - - if self.column_name not in df.columns: - if self.required_column: - return ValidationError( - f"Column {self.column_name} is missing in " - f"{self.table_name} table." + observable_parameters_count = { + o.id: len(o.observable_placeholders) + for o in problem.observables_table.observables + } + noise_parameters_count = { + o.id: len(o.noise_placeholders) + for o in problem.observables_table.observables + } + messages = [] + for m in problem.measurement_table.measurements: + # check observable parameters + try: + expected = observable_parameters_count[m.observable_id] + except KeyError: + messages.append( + f"Observable {m.observable_id} used in measurement " + f"table is not defined." ) - return + continue - try: - ids = df[self.column_name].values - if self.ignore_nan: - ids = ids[~pd.isna(ids)] - check_ids(ids, kind=self.column_name) - except ValueError as e: - return ValidationError(str(e)) + actual = len(m.observable_parameters) + if actual != expected: + formula = problem.observables_table[m.observable_id].formula + messages.append( + f"Mismatch of observable parameter overrides for " + f"{m.observable_id} ({formula})" + f"in:\n{m}\n" + f"Expected {expected} but got {actual}" + ) -class CheckMeasurementTable(ValidationTask): - """A task to validate the measurement table of a PEtab problem.""" + # check noise parameters + expected = noise_parameters_count[m.observable_id] + actual = len(m.noise_parameters) + if actual != expected: + # no overrides defined, but a numerical sigma can be provided + # anyway + if len(m.noise_parameters) != 1 or ( + len(m.noise_parameters) == 1 + and m.noise_parameters[0].is_number + ): + messages.append( + "No placeholders have been specified in the " + f"noise model for observable {m.observable_id}, " + "but a parameter ID " + "or multiple overrides were specified in the " + "noiseParameters column." + ) + else: + formula = problem.observables_table[ + m.observable_id + ].noise_formula + messages.append( + f"Mismatch of noise parameter overrides for " + f"{m.observable_id} ({formula})" + f"in:\n{m}\n" + f"Expected {expected} but got {actual}" + ) - def run(self, problem: Problem) -> ValidationIssue | None: - if problem.measurement_df is None: - return + if messages: + return ValidationError("\n".join(messages)) - df = problem.measurement_df - try: - _check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement") - for column_name in MEASUREMENT_DF_REQUIRED_COLS: - if not np.issubdtype(df[column_name].dtype, np.number): - assert_no_leading_trailing_whitespace( - df[column_name].values, column_name - ) +class CheckPosLogMeasurements(ValidationTask): + """A task to check that measurements for observables with + log-transformation are positive.""" - for column_name in MEASUREMENT_DF_OPTIONAL_COLS: - if column_name in df and not np.issubdtype( - df[column_name].dtype, np.number - ): - assert_no_leading_trailing_whitespace( - df[column_name].values, column_name + def run(self, problem: Problem) -> ValidationIssue | None: + from .core import ObservableTransformation as ot + + log_observables = { + o.id + for o in problem.observables_table.observables + if o.transformation in [ot.LOG, ot.LOG10] + } + if log_observables: + for m in problem.measurement_table.measurements: + if m.measurement <= 0 and m.observable_id in log_observables: + return ValidationError( + "Measurements with observable " + f"log transformation must be " + f"positive, but {m.measurement} <= 0 for {m}" ) - if problem.observable_df is not None: - assert_measured_observables_defined(df, problem.observable_df) - assert_overrides_match_parameter_count( - df, problem.observable_df - ) - if OBSERVABLE_TRANSFORMATION in problem.observable_df: - # Check for positivity of measurements in case of - # log-transformation - assert_unique_observable_ids(problem.observable_df) - # If the above is not checked, in the following loop - # trafo may become a pandas Series - for measurement, obs_id in zip( - df[MEASUREMENT], df[OBSERVABLE_ID], strict=True - ): - trafo = problem.observable_df.loc[ - obs_id, OBSERVABLE_TRANSFORMATION - ] - if measurement <= 0.0 and trafo in [LOG, LOG10]: - raise ValueError( - "Measurements with observable " - f"transformation {trafo} must be " - f"positive, but {measurement} <= 0." - ) - - assert_measurements_not_null(df) - assert_measurements_numeric(df) - except AssertionError as e: - return ValidationError(str(e)) +class CheckMeasuredExperimentsDefined(ValidationTask): + """A task to check that all experiments referenced by measurements + are defined.""" + def run(self, problem: Problem) -> ValidationIssue | None: # TODO: introduce some option for validation of partial vs full # problem. if this is supposed to be a complete problem, a missing # condition table should be an error if the measurement table refers # to conditions, otherwise it should maximally be a warning - used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values) - # handle default-experiment - used_experiments = set( - filter( - lambda x: not pd.isna(x), - used_experiments, - ) - ) + used_experiments = { + m.experiment_id + for m in problem.measurement_table.measurements + if m.experiment_id is not None + } + # check that measured experiments exist - available_experiments = ( - set(problem.experiment_df[EXPERIMENT_ID].unique()) - if problem.experiment_df is not None - else set() - ) + available_experiments = { + e.id for e in problem.experiments_table.experiments + } if missing_experiments := (used_experiments - available_experiments): return ValidationError( "Measurement table references experiments that " @@ -362,7 +397,7 @@ class CheckConditionTable(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: if problem.condition_df is None: - return + return None df = problem.condition_df @@ -383,13 +418,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: problem.model.get_valid_ids_for_condition_table() ) if problem.observable_df is not None: - allowed_targets |= set( - get_output_parameters( - model=problem.model, - observable_df=problem.observable_df, - mapping_df=problem.mapping_df, - ) - ) + allowed_targets |= set(get_output_parameters(problem)) if problem.mapping_df is not None: allowed_targets |= set(problem.mapping_df.index.values) invalid = set(df[TARGET_ID].unique()) - allowed_targets @@ -561,12 +590,12 @@ def run(self, problem: Problem) -> ValidationIssue | None: or problem.observable_df is None or problem.measurement_df is None ): - return + return None required = get_required_parameters_for_parameter_table(problem) allowed = get_valid_parameters_for_parameter_table(problem) - actual = set(problem.parameter_df.index) + actual = {p.id for p in problem.parameter_table.parameters} missing = required - actual extraneous = actual - allowed @@ -694,14 +723,9 @@ def get_valid_parameters_for_parameter_table( problem: Problem, ) -> set[str]: """ - Get set of parameters which may be present inside the parameter table + Get the set of parameters which may be present inside the parameter table - Arguments: - model: PEtab model - condition_df: PEtab condition table - observable_df: PEtab observable table - measurement_df: PEtab measurement table - mapping_df: PEtab mapping table for additional checks + :param problem: The PEtab problem Returns: Set of parameter IDs which PEtab allows to be present in the @@ -715,71 +739,49 @@ def get_valid_parameters_for_parameter_table( # - remove parameters for which condition table columns exist # - remove placeholder parameters # (only partial overrides are not supported) - model = problem.model - condition_df = problem.condition_df - observable_df = problem.observable_df - measurement_df = problem.measurement_df - mapping_df = problem.mapping_df - # must not go into parameter table - blackset = set() + blackset = set(get_placeholders(problem)) - if observable_df is not None: - placeholders = set(get_placeholders(observable_df)) - - # collect assignment targets - blackset |= placeholders - - if condition_df is not None: - blackset |= set(condition_df.columns.values) + # condition table targets + blackset |= { + change.target_id + for cond in problem.conditions_table.conditions + for change in cond.changes + } # don't use sets here, to have deterministic ordering, # e.g. for creating parameter tables parameter_ids = OrderedDict.fromkeys( p - for p in model.get_valid_parameters_for_parameter_table() + for p in problem.model.get_valid_parameters_for_parameter_table() if p not in blackset ) - if mapping_df is not None: - for from_id, to_id in mapping_df[MODEL_ENTITY_ID].items(): - if to_id in parameter_ids.keys(): - parameter_ids[from_id] = None + for mapping in problem.mapping_table.mappings: + if mapping.model_id and mapping.model_id in parameter_ids.keys(): + parameter_ids[mapping.petab_id] = None - if observable_df is not None: - # add output parameters from observables table - output_parameters = get_output_parameters( - observable_df=observable_df, model=model - ) - for p in output_parameters: - if p not in blackset: - parameter_ids[p] = None + # add output parameters from observables table + output_parameters = get_output_parameters(problem) + for p in output_parameters: + if p not in blackset: + parameter_ids[p] = None # Append parameters from measurement table, unless they occur as condition # table columns def append_overrides(overrides): for p in overrides: - if isinstance(p, str) and p not in blackset: - parameter_ids[p] = None - - if measurement_df is not None: - for _, row in measurement_df.iterrows(): - # we trust that the number of overrides matches - append_overrides( - split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None) - ) - ) - append_overrides( - split_parameter_replacement_list( - row.get(NOISE_PARAMETERS, None) - ) - ) + if isinstance(p, sp.Symbol) and (str_p := str(p)) not in blackset: + parameter_ids[str_p] = None + + for measurement in problem.measurement_table.measurements: + # we trust that the number of overrides matches + append_overrides(measurement.observable_parameters) + append_overrides(measurement.noise_parameters) # Append parameter overrides from condition table - if condition_df is not None: - for p in v2.conditions.get_condition_table_free_symbols(problem): - parameter_ids[str(p)] = None + for p in v2.conditions.get_condition_table_free_symbols(problem): + parameter_ids[str(p)] = None return set(parameter_ids.keys()) @@ -799,34 +801,30 @@ def get_required_parameters_for_parameter_table( that are not defined in the model. """ parameter_ids = set() + condition_targets = { + change.target_id + for cond in problem.conditions_table.conditions + for change in cond.changes + } # Add parameters from measurement table, unless they are fixed parameters def append_overrides(overrides): parameter_ids.update( - p + str_p for p in overrides - if isinstance(p, str) - and ( - problem.condition_df is None - or p not in problem.condition_df[TARGET_ID] - ) + if isinstance(p, sp.Symbol) + and (str_p := str(p)) not in condition_targets ) - for _, row in problem.measurement_df.iterrows(): + for m in problem.measurement_table.measurements: # we trust that the number of overrides matches - append_overrides( - split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None) - ) - ) - append_overrides( - split_parameter_replacement_list(row.get(NOISE_PARAMETERS, None)) - ) + append_overrides(m.observable_parameters) + append_overrides(m.noise_parameters) - # remove `observable_ids` when - # `get_output_parameters` is updated for PEtab v2/v1.1, where - # observable IDs are allowed in observable formulae - observable_ids = set(problem.observable_df.index) + # TODO remove `observable_ids` when + # `get_output_parameters` is updated for PEtab v2/v1.1, where + # observable IDs are allowed in observable formulae + observable_ids = {o.id for o in problem.observables_table.observables} # Add output parameters except for placeholders for formula_type, placeholder_sources in ( @@ -844,13 +842,11 @@ def append_overrides(overrides): ), ): output_parameters = get_output_parameters( - problem.observable_df, - problem.model, - mapping_df=problem.mapping_df, + problem, **formula_type, ) placeholders = get_placeholders( - problem.observable_df, + problem, **placeholder_sources, ) parameter_ids.update( @@ -868,25 +864,106 @@ def append_overrides(overrides): ) # parameters that are overridden via the condition table are not allowed - if problem.condition_df is not None: - parameter_ids -= set(problem.condition_df[TARGET_ID].unique()) + parameter_ids -= condition_targets return parameter_ids +def get_output_parameters( + problem: Problem, + observables: bool = True, + noise: bool = True, +) -> list[str]: + """Get output parameters + + Returns IDs of parameters used in observable and noise formulas that are + not defined in the model. + + Arguments: + problem: The PEtab problem + observables: Include parameters from observableFormulas + noise: Include parameters from noiseFormulas + + Returns: + List of output parameter IDs + """ + formulas = [] + if observables: + formulas.extend( + o.formula for o in problem.observables_table.observables + ) + if noise: + formulas.extend( + o.noise_formula for o in problem.observables_table.observables + ) + output_parameters = OrderedDict() + + for formula in formulas: + free_syms = sorted( + formula.free_symbols, + key=lambda symbol: symbol.name, + ) + for free_sym in free_syms: + sym = str(free_sym) + if problem.model.symbol_allowed_in_observable_formula(sym): + continue + + # does it map to a model entity? + + if ( + (mapped := problem.mapping_table.get(sym)) is not None + and mapped.model_id is not None + and problem.model.symbol_allowed_in_observable_formula( + mapped.model_id + ) + ): + continue + + output_parameters[sym] = None + + return list(output_parameters.keys()) + + +def get_placeholders( + problem: Problem, + observables: bool = True, + noise: bool = True, +) -> list[str]: + """Get all placeholder parameters from observable table observableFormulas + and noiseFormulas. + + Arguments: + problem: The PEtab problem + observables: Include parameters from observableFormulas + noise: Include parameters from noiseFormulas + + Returns: + List of placeholder parameters from observable table observableFormulas + and noiseFormulas. + """ + # collect placeholder parameters overwritten by + # {observable,noise}Parameters + placeholders = [] + for o in problem.observables_table.observables: + if observables: + placeholders.extend(map(str, o.observable_placeholders)) + if noise: + placeholders.extend(map(str, o.noise_placeholders)) + + from ..v1.core import unique_preserve_order + + return unique_preserve_order(placeholders) + + #: Validation tasks that should be run on any PEtab problem default_validation_tasks = [ - CheckTableExists("measurement"), - CheckTableExists("observable"), - CheckTableExists("parameter"), + CheckProblemConfig(), CheckModel(), - CheckMeasurementTable(), + CheckPosLogMeasurements(), + CheckMeasuredObservablesDefined(), + CheckOverridesMatchPlaceholders(), CheckConditionTable(), CheckExperimentTable(), - CheckValidPetabIdColumn("measurement", EXPERIMENT_ID, ignore_nan=True), - CheckValidPetabIdColumn("experiment", EXPERIMENT_ID), - # TODO: NAN allowed? - CheckValidPetabIdColumn("experiment", CONDITION_ID, ignore_nan=True), CheckExperimentConditionsExist(), CheckObservableTable(), CheckObservablesDoNotShadowModelEntities(), diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 102097c1..20e54aa9 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -226,11 +226,19 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: validation_issues = v2.lint_problem(new_yaml_file) if validation_issues: - validation_issues = "\n".join(map(str, validation_issues)) - raise ValueError( - "The generated PEtab v2 problem did not pass linting: " - f"{validation_issues}" + sev = v2.lint.ValidationIssueSeverity + validation_issues.log(max_level=sev.WARNING) + errors = "\n".join( + map( + str, + (i for i in validation_issues if i.level > sev.WARNING), + ) ) + if errors: + raise ValueError( + "The generated PEtab v2 problem did not pass linting: " + f"{errors}" + ) def _update_yaml(yaml_config: dict) -> dict: diff --git a/petab/v2/problem.py b/petab/v2/problem.py index bbf28fd9..3b331996 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -76,7 +76,7 @@ def __init__( experiments_table: core.ExperimentsTable = None, observables_table: core.ObservablesTable = None, measurement_table: core.MeasurementTable = None, - parameters_table: core.ParametersTable = None, + parameters_table: core.ParameterTable = None, mapping_table: core.MappingTable = None, visualization_df: pd.DataFrame = None, extensions_config: dict = None, @@ -84,12 +84,12 @@ def __init__( ): from ..v2.lint import default_validation_tasks + self.config = config self.model: Model | None = model self.extensions_config = extensions_config or {} self.validation_tasks: list[ValidationTask] = ( default_validation_tasks.copy() ) - self.config = config self.observables_table = observables_table or core.ObservablesTable( observables=[] @@ -109,45 +109,24 @@ def __init__( ) self.visualization_df = visualization_df - self.config = config - self.extensions_config = extensions_config def __str__(self): model = f"with model ({self.model})" if self.model else "without model" - experiments = ( - f"{self.experiment_df.shape[0]} experiments" - if self.experiment_df is not None - else "without experiments table" - ) + ne = len(self.experiments_table.experiments) + experiments = f"{ne} experiments" - conditions = ( - f"{self.condition_df.shape[0]} conditions" - if self.condition_df is not None - else "without conditions table" - ) + nc = len(self.conditions_table.conditions) + conditions = f"{nc} conditions" - observables = ( - f"{self.observable_df.shape[0]} observables" - if self.observable_df is not None - else "without observables table" - ) + no = len(self.observables_table.observables) + observables = f"{no} observables" - measurements = ( - f"{self.measurement_df.shape[0]} measurements" - if self.measurement_df is not None - else "without measurements table" - ) + nm = len(self.measurement_table.measurements) + measurements = f"{nm} measurements" - if self.parameter_df is not None: - num_estimated_parameters = ( - sum(self.parameter_df[ESTIMATE] == 1) - if ESTIMATE in self.parameter_df - else self.parameter_df.shape[0] - ) - parameters = f"{num_estimated_parameters} estimated parameters" - else: - parameters = "without parameter_df table" + nest = self.parameter_table.n_estimated + parameters = f"{nest} estimated parameters" return ( f"PEtab Problem {model}, {conditions}, {experiments}, " diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index a08af22c..d4eab006 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -12,6 +12,7 @@ ESTIMATE, LOWER_BOUND, MODEL_ENTITY_ID, + NAME, NOISE_FORMULA, NOMINAL_VALUE, OBSERVABLE_FORMULA, @@ -163,6 +164,7 @@ def test_modify_problem(): data={ PETAB_ENTITY_ID: ["new_petab_id"], MODEL_ENTITY_ID: ["some_model_entity_id"], + NAME: [None], } ).set_index([PETAB_ENTITY_ID]) assert_frame_equal(problem.mapping_df, exp_mapping_df, check_dtype=False) From a3b1f6656e3008818f56a350f3e155f34ee7fd3a Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Sat, 22 Mar 2025 15:39:40 +0100 Subject: [PATCH 07/19] .. --- petab/v2/core.py | 41 +++++++ petab/v2/lint.py | 300 ++++++++++++++++++++--------------------------- 2 files changed, 170 insertions(+), 171 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 05825b73..c9397f84 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -21,6 +21,7 @@ field_validator, model_validator, ) +from typing_extensions import Self from ..v1.lint import is_valid_identifier from ..v1.math import sympify_petab @@ -872,6 +873,26 @@ def _validate_id(cls, v): raise ValueError(f"Invalid ID: {v}") return v + @field_validator("estimate", mode="before") + @classmethod + def _validate_estimate_before(cls, v): + if isinstance(v, bool): + return v + + if isinstance(v, int) or isinstance(v, float) and v.is_integer(): + if v == 0: + return False + if v == 1: + return True + + if isinstance(v, str): + if v == "0": + return False + if v == "1": + return True + + raise ValueError(f"Invalid value for estimate: {v}. Must be 0 or 1.") + @field_validator("lb", "ub", "nominal_value") @classmethod def _convert_nan_to_none(cls, v): @@ -879,6 +900,26 @@ def _convert_nan_to_none(cls, v): return None return v + @model_validator(mode="after") + def _validate(self) -> Self: + if not self.estimate and self.nominal_value is None: + raise ValueError( + "Non-estimated parameter must have a nominal value" + ) + + if self.estimate and (self.lb is None or self.ub is None): + raise ValueError( + "Estimated parameter must have lower and upper bounds set" + ) + + if self.lb is not None and self.ub is not None and self.lb >= self.ub: + raise ValueError("Lower bound must be less than upper bound") + + # TODO parameterScale? + + # TODO priorType, priorParameters + return self + class ParameterTable(BaseModel): """PEtab parameter table.""" diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 75cf131c..29f96d8b 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -10,26 +10,10 @@ from enum import IntEnum from pathlib import Path -import numpy as np import pandas as pd import sympy as sp from .. import v2 -from ..v1.lint import ( - _check_df, - assert_model_parameters_in_condition_or_parameter_table, - assert_no_leading_trailing_whitespace, - assert_parameter_bounds_are_numeric, - assert_parameter_estimate_is_boolean, - assert_parameter_id_is_string, - assert_parameter_prior_parameters_are_valid, - assert_parameter_prior_type_is_valid, - assert_parameter_scale_is_valid, - assert_unique_parameter_ids, - check_ids, - check_observable_df, - check_parameter_bounds, -) from ..v1.visualize.lint import validate_visualization_df from ..v2.C import * from .problem import Problem @@ -45,9 +29,8 @@ "CheckModel", "CheckProblemConfig", "CheckPosLogMeasurements", - "CheckConditionTable", - "CheckObservableTable", - "CheckParameterTable", + "CheckValidConditionTargets", + "CheckUniquePrimaryKeys", "CheckExperimentTable", "CheckExperimentConditionsExist", "CheckAllParametersPresentInParameterTable", @@ -210,9 +193,6 @@ def __call__(self, *args, **kwargs): return self.run(*args, **kwargs) -# TODO: check for uniqueness of all primary keys - - class CheckProblemConfig(ValidationTask): """A task to validate the configuration of a PEtab problem. @@ -392,72 +372,82 @@ def run(self, problem: Problem) -> ValidationIssue | None: ) -class CheckConditionTable(ValidationTask): - """A task to validate the condition table of a PEtab problem.""" +class CheckValidConditionTargets(ValidationTask): + """Check that all condition table targets are valid.""" def run(self, problem: Problem) -> ValidationIssue | None: - if problem.condition_df is None: - return None - - df = problem.condition_df - - try: - _check_df(df, CONDITION_DF_REQUIRED_COLS, "condition") - check_ids(df[CONDITION_ID], kind="condition") - check_ids(df[TARGET_ID], kind="target") - except AssertionError as e: - return ValidationError(str(e)) - - # TODO: check value types - - if problem.model is None: - return - - # check targets are valid allowed_targets = set( problem.model.get_valid_ids_for_condition_table() ) - if problem.observable_df is not None: - allowed_targets |= set(get_output_parameters(problem)) - if problem.mapping_df is not None: - allowed_targets |= set(problem.mapping_df.index.values) - invalid = set(df[TARGET_ID].unique()) - allowed_targets - if invalid: + allowed_targets |= set(get_output_parameters(problem)) + allowed_targets |= { + m.petab_id + for m in problem.mapping_table.mappings + if m.model_id is not None + } + + used_targets = { + change.target_id + for cond in problem.conditions_table.conditions + for change in cond.changes + } + + if invalid := (used_targets - allowed_targets): return ValidationError( f"Condition table contains invalid targets: {invalid}" ) - # TODO check that all value types are valid for the given targets +class CheckUniquePrimaryKeys(ValidationTask): + """Check that all primary keys are unique.""" -class CheckObservableTable(ValidationTask): - """A task to validate the observable table of a PEtab problem.""" + def run(self, problem: Problem) -> ValidationIssue | None: + # check for uniqueness of all primary keys + counter = Counter(c.id for c in problem.conditions_table.conditions) + duplicates = {id for id, count in counter.items() if count > 1} - def run(self, problem: Problem): - if problem.observable_df is None: - return + if duplicates: + return ValidationError( + f"Condition table contains duplicate IDs: {duplicates}" + ) + + counter = Counter(o.id for o in problem.observables_table.observables) + duplicates = {id for id, count in counter.items() if count > 1} - try: - check_observable_df( - problem.observable_df, + if duplicates: + return ValidationError( + f"Observable table contains duplicate IDs: {duplicates}" ) - except AssertionError as e: - return ValidationIssue( - level=ValidationIssueSeverity.ERROR, message=str(e) + + counter = Counter(e.id for e in problem.experiments_table.experiments) + duplicates = {id for id, count in counter.items() if count > 1} + + if duplicates: + return ValidationError( + f"Experiment table contains duplicate IDs: {duplicates}" + ) + + counter = Counter(p.id for p in problem.parameter_table.parameters) + duplicates = {id for id, count in counter.items() if count > 1} + + if duplicates: + return ValidationError( + f"Parameter table contains duplicate IDs: {duplicates}" ) class CheckObservablesDoNotShadowModelEntities(ValidationTask): """A task to check that observable IDs do not shadow model entities.""" + # TODO: all PEtab entity IDs must be disjoint from the model entity IDs def run(self, problem: Problem) -> ValidationIssue | None: - if problem.observable_df is None or problem.model is None: - return + if not problem.observables_table.observables or problem.model is None: + return None shadowed_entities = [ - obs_id - for obs_id in problem.observable_df.index - if problem.model.has_entity_with_id(obs_id) + o.id + for o in problem.observables_table.observables + if problem.model.has_entity_with_id(o.id) ] if shadowed_entities: return ValidationError( @@ -465,76 +455,6 @@ def run(self, problem: Problem) -> ValidationIssue | None: ) -class CheckParameterTable(ValidationTask): - """A task to validate the parameter table of a PEtab problem.""" - - def run(self, problem: Problem) -> ValidationIssue | None: - if problem.parameter_df is None: - return - - try: - df = problem.parameter_df - _check_df(df, PARAMETER_DF_REQUIRED_COLS[1:], "parameter") - - if df.index.name != PARAMETER_ID: - return ValidationError( - f"Parameter table has wrong index {df.index.name}." - f" Expected {PARAMETER_ID}.", - ) - - check_ids(df.index.values, kind="parameter") - - for column_name in PARAMETER_DF_REQUIRED_COLS[ - 1: - ]: # 0 is PARAMETER_ID - if not np.issubdtype(df[column_name].dtype, np.number): - assert_no_leading_trailing_whitespace( - df[column_name].values, column_name - ) - - # nominal value is required for non-estimated parameters - non_estimated_par_ids = list( - df.index[ - (df[ESTIMATE] != 1) - | ( - pd.api.types.is_string_dtype(df[ESTIMATE]) - and df[ESTIMATE] != "1" - ) - ] - ) - # TODO implement as validators - # `assert_has_fixed_parameter_nominal_values` - # and `assert_correct_table_dtypes` - if non_estimated_par_ids: - if NOMINAL_VALUE not in df: - return ValidationError( - "Parameter table contains parameters " - f"{non_estimated_par_ids} that are not " - "specified to be estimated, " - f"but column {NOMINAL_VALUE} is missing." - ) - try: - df.loc[non_estimated_par_ids, NOMINAL_VALUE].apply(float) - except ValueError: - return ValidationError( - f"Expected numeric values for `{NOMINAL_VALUE}` " - "in parameter table " - "for all non-estimated parameters." - ) - - assert_parameter_id_is_string(df) - assert_parameter_scale_is_valid(df) - assert_parameter_bounds_are_numeric(df) - assert_parameter_estimate_is_boolean(df) - assert_unique_parameter_ids(df) - check_parameter_bounds(df) - assert_parameter_prior_type_is_valid(df) - assert_parameter_prior_parameters_are_valid(df) - - except AssertionError as e: - return ValidationError(str(e)) - - class CheckExperimentTable(ValidationTask): """A task to validate the experiment table of a PEtab problem.""" @@ -584,12 +504,7 @@ class CheckAllParametersPresentInParameterTable(ValidationTask): with no additional ones.""" def run(self, problem: Problem) -> ValidationIssue | None: - if ( - problem.model is None - or problem.parameter_df is None - or problem.observable_df is None - or problem.measurement_df is None - ): + if problem.model is None: return None required = get_required_parameters_for_parameter_table(problem) @@ -601,17 +516,13 @@ def run(self, problem: Problem) -> ValidationIssue | None: # missing parameters might be present under a different name based on # the mapping table - if missing and problem.mapping_df is not None: + if missing: model_to_petab_mapping = {} - for map_from, map_to in zip( - problem.mapping_df.index.values, - problem.mapping_df[MODEL_ENTITY_ID], - strict=True, - ): - if map_to in model_to_petab_mapping: - model_to_petab_mapping[map_to].append(map_from) + for m in problem.mapping_table.mappings: + if m.model_id in model_to_petab_mapping: + model_to_petab_mapping[m.model_id].append(m.petab_id) else: - model_to_petab_mapping[map_to] = [map_from] + model_to_petab_mapping[m.model_id] = [m.petab_id] missing = { missing_id for missing_id in missing @@ -640,23 +551,71 @@ class CheckValidParameterInConditionOrParameterTable(ValidationTask): present in the condition or parameter table.""" def run(self, problem: Problem) -> ValidationIssue | None: - if ( - problem.model is None - or problem.condition_df is None - or problem.parameter_df is None - ): - return + if problem.model is None: + return None - try: - assert_model_parameters_in_condition_or_parameter_table( - problem.model, - problem.condition_df, - problem.parameter_df, - problem.mapping_df, + allowed_in_condition_cols = set( + problem.model.get_valid_ids_for_condition_table() + ) + allowed_in_condition_cols |= { + m.petab_id + for m in problem.mapping_table.mappings + if not pd.isna(m.model_id) + and ( + # mapping table entities mapping to already allowed parameters + m.model_id in allowed_in_condition_cols + # mapping table entities mapping to species + or problem.model.is_state_variable(m.model_id) ) - except AssertionError as e: - return ValidationIssue( - level=ValidationIssueSeverity.ERROR, message=str(e) + } + + allowed_in_parameter_table = get_valid_parameters_for_parameter_table( + problem + ) + + entities_in_condition_table = { + change.target_id + for cond in problem.conditions_table.conditions + for change in cond.changes + } + entities_in_parameter_table = { + p.id for p in problem.parameter_table.parameters + } + + disallowed_in_condition = { + x + for x in (entities_in_condition_table - allowed_in_condition_cols) + # we only check model entities here, not output parameters + if problem.model.has_entity_with_id(x) + } + if disallowed_in_condition: + is_or_are = "is" if len(disallowed_in_condition) == 1 else "are" + return ValidationError( + f"{disallowed_in_condition} {is_or_are} not " + "allowed to occur in condition table " + "columns." + ) + + disallowed_in_parameters = { + x + for x in (entities_in_parameter_table - allowed_in_parameter_table) + # we only check model entities here, not output parameters + if problem.model.has_entity_with_id(x) + } + + if disallowed_in_parameters: + is_or_are = "is" if len(disallowed_in_parameters) == 1 else "are" + return ValidationError( + f"{disallowed_in_parameters} {is_or_are} not " + "allowed to occur in the parameters table." + ) + + in_both = entities_in_condition_table & entities_in_parameter_table + if in_both: + is_or_are = "is" if len(in_both) == 1 else "are" + return ValidationError( + f"{in_both} {is_or_are} present in both " + "the condition table and the parameter table." ) @@ -959,19 +918,18 @@ def get_placeholders( default_validation_tasks = [ CheckProblemConfig(), CheckModel(), - CheckPosLogMeasurements(), + CheckUniquePrimaryKeys(), CheckMeasuredObservablesDefined(), + CheckPosLogMeasurements(), CheckOverridesMatchPlaceholders(), - CheckConditionTable(), + CheckValidConditionTargets(), CheckExperimentTable(), CheckExperimentConditionsExist(), - CheckObservableTable(), CheckObservablesDoNotShadowModelEntities(), - CheckParameterTable(), CheckAllParametersPresentInParameterTable(), - # TODO: atomize checks, update to long condition table, re-enable - # CheckVisualizationTable(), CheckValidParameterInConditionOrParameterTable(), CheckUnusedExperiments(), CheckUnusedConditions(), + # TODO: atomize checks, update to long condition table, re-enable + # CheckVisualizationTable(), ] From f700fd97650b1cfb9877e53aea05978ac205c94f Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Sat, 22 Mar 2025 16:15:46 +0100 Subject: [PATCH 08/19] .. --- petab/v2/conditions.py | 2 +- petab/v2/core.py | 83 +++++++++++++----------------------------- petab/v2/lint.py | 24 ++++++------ 3 files changed, 40 insertions(+), 69 deletions(-) diff --git a/petab/v2/conditions.py b/petab/v2/conditions.py index 90724d04..124f8f2b 100644 --- a/petab/v2/conditions.py +++ b/petab/v2/conditions.py @@ -9,8 +9,8 @@ import sympy as sp from .. import v2 +from ..v1.lint import assert_no_leading_trailing_whitespace from .C import * -from .lint import assert_no_leading_trailing_whitespace __all__ = [ "get_condition_df", diff --git a/petab/v2/core.py b/petab/v2/core.py index c9397f84..d0e76628 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -68,6 +68,15 @@ def _convert_nan_to_none(v): return v +def _valid_petab_id(v: str) -> str: + """Field validator for PEtab IDs.""" + if not v: + raise ValueError("ID must not be empty.") + if not is_valid_identifier(v): + raise ValueError(f"Invalid ID: {v}") + return v + + class ObservableTransformation(str, Enum): """Observable transformation types. @@ -141,7 +150,9 @@ class Observable(BaseModel): """Observable definition.""" #: Observable ID. - id: str = Field(alias=C.OBSERVABLE_ID) + id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.OBSERVABLE_ID + ) #: Observable name. name: str | None = Field(alias=C.OBSERVABLE_NAME, default=None) #: Observable formula. @@ -162,15 +173,6 @@ class Observable(BaseModel): arbitrary_types_allowed=True, populate_by_name=True ) - @field_validator("id") - @classmethod - def _validate_id(cls, v): - if not v: - raise ValueError("ID must not be empty.") - if not is_valid_identifier(v): - raise ValueError(f"Invalid ID: {v}") - return v - @field_validator( "name", "formula", @@ -313,9 +315,11 @@ class Change(BaseModel): """ #: The ID of the target entity to change. - target_id: str | None = Field(alias=C.TARGET_ID, default=None) + target_id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.TARGET_ID + ) #: The value to set the target entity to. - target_value: sp.Basic | None = Field(alias=C.TARGET_VALUE, default=None) + target_value: sp.Basic = Field(alias=C.TARGET_VALUE) #: :meta private: model_config = ConfigDict( @@ -324,16 +328,6 @@ class Change(BaseModel): use_enum_values=True, ) - @model_validator(mode="before") - @classmethod - def _validate_id(cls, data: dict): - target_id = data.get("target_id", data.get(C.TARGET_ID)) - - if not is_valid_identifier(target_id): - raise ValueError(f"Invalid ID: {target_id}") - - return data - @field_validator("target_value", mode="before") @classmethod def _sympify(cls, v): @@ -366,22 +360,15 @@ class Condition(BaseModel): """ #: The condition ID. - id: str = Field(alias=C.CONDITION_ID) + id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.CONDITION_ID + ) #: The changes associated with this condition. changes: list[Change] #: :meta private: model_config = ConfigDict(populate_by_name=True) - @field_validator("id") - @classmethod - def _validate_id(cls, v): - if not v: - raise ValueError("ID must not be empty.") - if not is_valid_identifier(v): - raise ValueError(f"Invalid ID: {v}") - return v - def __add__(self, other: Change) -> Condition: """Add a change to the set.""" if not isinstance(other, Change): @@ -488,8 +475,6 @@ class ExperimentPeriod(BaseModel): def _validate_id(cls, condition_id): if pd.isna(condition_id) or not condition_id: return None - # if not condition_id: - # raise ValueError("ID must not be empty.") if not is_valid_identifier(condition_id): raise ValueError(f"Invalid ID: {condition_id}") return condition_id @@ -504,7 +489,9 @@ class Experiment(BaseModel): """ #: The experiment ID. - id: str = Field(alias=C.EXPERIMENT_ID) + id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.EXPERIMENT_ID + ) #: The periods of the experiment. periods: list[ExperimentPeriod] = [] @@ -513,15 +500,6 @@ class Experiment(BaseModel): arbitrary_types_allowed=True, populate_by_name=True ) - @field_validator("id") - @classmethod - def _validate_id(cls, v): - if not v: - raise ValueError("ID must not be empty.") - if not is_valid_identifier(v): - raise ValueError(f"Invalid ID: {v}") - return v - def __add__(self, other: ExperimentPeriod) -> Experiment: """Add a period to the experiment.""" if not isinstance(other, ExperimentPeriod): @@ -747,7 +725,9 @@ class Mapping(BaseModel): """Mapping PEtab entities to model entities.""" #: PEtab entity ID. - petab_id: str = Field(alias=C.PETAB_ENTITY_ID) + petab_id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.PETAB_ENTITY_ID + ) #: Model entity ID. model_id: Annotated[str | None, BeforeValidator(_convert_nan_to_none)] = ( Field(alias=C.MODEL_ENTITY_ID, default=None) @@ -760,17 +740,6 @@ class Mapping(BaseModel): #: :meta private: model_config = ConfigDict(populate_by_name=True) - @field_validator( - "petab_id", - ) - @classmethod - def _validate_id(cls, v): - if not v: - raise ValueError("ID must not be empty.") - if not is_valid_identifier(v): - raise ValueError(f"Invalid ID: {v}") - return v - class MappingTable(BaseModel): """PEtab mapping table.""" @@ -913,7 +882,7 @@ def _validate(self) -> Self: ) if self.lb is not None and self.ub is not None and self.lb >= self.ub: - raise ValueError("Lower bound must be less than upper bound") + raise ValueError("Lower bound must be less than upper bound.") # TODO parameterScale? diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 29f96d8b..e9ef6f3d 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -14,8 +14,6 @@ import sympy as sp from .. import v2 -from ..v1.visualize.lint import validate_visualization_df -from ..v2.C import * from .problem import Problem logger = logging.getLogger(__name__) @@ -165,7 +163,7 @@ def lint_problem(problem: Problem | str | Path) -> ValidationResultList: Arguments: problem: PEtab problem to check. Instance of :class:`Problem` or path - to a PEtab problem yaml file. + to a PEtab problem YAML file. Returns: A list of validation results. Empty if no issues were found. """ @@ -324,7 +322,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: class CheckPosLogMeasurements(ValidationTask): - """A task to check that measurements for observables with + """Check that measurements for observables with log-transformation are positive.""" def run(self, problem: Problem) -> ValidationIssue | None: @@ -669,7 +667,9 @@ class CheckVisualizationTable(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: if problem.visualization_df is None: - return + return None + + from ..v1.visualize.lint import validate_visualization_df if validate_visualization_df(problem): return ValidationIssue( @@ -698,22 +698,23 @@ def get_valid_parameters_for_parameter_table( # - remove parameters for which condition table columns exist # - remove placeholder parameters # (only partial overrides are not supported) + # must not go into parameter table - blackset = set(get_placeholders(problem)) + invalid = set(get_placeholders(problem)) # condition table targets - blackset |= { + invalid |= { change.target_id for cond in problem.conditions_table.conditions for change in cond.changes } # don't use sets here, to have deterministic ordering, - # e.g. for creating parameter tables + # e.g., for creating parameter tables parameter_ids = OrderedDict.fromkeys( p for p in problem.model.get_valid_parameters_for_parameter_table() - if p not in blackset + if p not in invalid ) for mapping in problem.mapping_table.mappings: @@ -723,14 +724,14 @@ def get_valid_parameters_for_parameter_table( # add output parameters from observables table output_parameters = get_output_parameters(problem) for p in output_parameters: - if p not in blackset: + if p not in invalid: parameter_ids[p] = None # Append parameters from measurement table, unless they occur as condition # table columns def append_overrides(overrides): for p in overrides: - if isinstance(p, sp.Symbol) and (str_p := str(p)) not in blackset: + if isinstance(p, sp.Symbol) and (str_p := str(p)) not in invalid: parameter_ids[str_p] = None for measurement in problem.measurement_table.measurements: @@ -932,4 +933,5 @@ def get_placeholders( CheckUnusedConditions(), # TODO: atomize checks, update to long condition table, re-enable # CheckVisualizationTable(), + # TODO validate mapping table ] From 164908095b64631562cfcb437478fbc64425bbaa Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 24 Mar 2025 08:59:10 +0100 Subject: [PATCH 09/19] .. --- petab/v2/__init__.py | 34 ++++++------- petab/v2/petab1to2.py | 13 +++-- petab/v2/problem.py | 110 ++++++++++++++++++++++-------------------- pytest.ini | 2 - 4 files changed, 83 insertions(+), 76 deletions(-) diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index 4d147828..490c9fe2 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -5,27 +5,27 @@ from warnings import warn -# TODO: remove v1 star imports -from ..v1.calculate import * # noqa: F403, F401, E402 -from ..v1.composite_problem import * # noqa: F403, F401, E402 -from ..v1.core import * # noqa: F403, F401, E402 -from ..v1.format_version import __format_version__ # noqa: F401, E402 -from ..v1.mapping import * # noqa: F403, F401, E402 -from ..v1.measurements import * # noqa: F403, F401, E402 -from ..v1.observables import * # noqa: F403, F401, E402 -from ..v1.parameter_mapping import * # noqa: F403, F401, E402 -from ..v1.parameters import * # noqa: F403, F401, E402 -from ..v1.sampling import * # noqa: F403, F401, E402 -from ..v1.sbml import * # noqa: F403, F401, E402 -from ..v1.simulate import * # noqa: F403, F401, E402 -from ..v1.yaml import * # noqa: F403, F401, E402 - warn( "Support for PEtab2.0 and all of petab.v2 is experimental " "and subject to changes!", stacklevel=1, ) +# TODO: move this module to v2 +from petab.v1.mapping import ( # noqa: F403, F401, E402 + get_mapping_df, + write_mapping_df, +) +from petab.v1.measurements import ( # noqa: F401, E402 + get_measurement_df, + write_measurement_df, +) +from petab.v1.observables import ( # noqa: F401, E402 + get_observable_df, + write_observable_df, +) +from petab.v1.yaml import load_yaml # noqa: F401, E402 + # import after v1 from ..version import __version__ # noqa: F401, E402 from . import ( # noqa: F401, E402 @@ -38,5 +38,5 @@ write_experiment_df, ) from .lint import lint_problem # noqa: F401, E402 -from .models import Model # noqa: F401, E402 -from .problem import Problem # noqa: F401, E402 +from .models import MODEL_TYPE_PYSB, MODEL_TYPE_SBML, Model # noqa: F401, E402 +from .problem import Problem, ProblemConfig # noqa: F401, E402 diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 20e54aa9..93471694 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -1,5 +1,7 @@ """Convert PEtab version 1 problems to version 2.""" +from __future__ import annotations + import shutil from contextlib import suppress from itertools import chain @@ -14,12 +16,13 @@ from ..v1.yaml import get_path_prefix, load_yaml, validate from ..versions import get_major_version from .models import MODEL_TYPE_SBML -from .problem import ProblemConfig __all__ = ["petab1to2"] -def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): +def petab1to2( + yaml_config: Path | str, output_dir: Path | str = None +) -> v2.Problem | None: """Convert from PEtab 1.0 to PEtab 2.0 format. Convert a PEtab problem from PEtab 1.0 to PEtab 2.0 format. @@ -56,7 +59,7 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): get_dest_path = lambda filename: f"{output_dir}/{filename}" # noqa: E731 - # Validate original PEtab problem + # Validate the original PEtab problem validate(yaml_config, path_prefix=path_prefix) if get_major_version(yaml_config) != 1: raise ValueError("PEtab problem is not version 1.") @@ -72,7 +75,7 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): # Update YAML file new_yaml_config = _update_yaml(yaml_config) - new_yaml_config = ProblemConfig(**new_yaml_config) + new_yaml_config = v2.ProblemConfig(**new_yaml_config) # Update tables # condition tables, observable tables, SBML files, parameter table: @@ -218,7 +221,7 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: measurement_df, get_dest_path(measurement_file) ) - # Write new YAML file + # Write the new YAML file new_yaml_file = output_dir / Path(yaml_file).name new_yaml_config.to_yaml(new_yaml_file) diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 3b331996..ec13ba88 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -36,7 +36,7 @@ from ..v2.lint import ValidationResultList, ValidationTask -__all__ = ["Problem"] +__all__ = ["Problem", "ProblemConfig"] class Problem: @@ -56,17 +56,6 @@ class Problem: Optionally, it may contain visualization tables. See also :doc:`petab:v2/documentation_data_format`. - - Parameters: - condition_df: PEtab condition table - experiment_df: PEtab experiment table - measurement_df: PEtab measurement table - parameter_df: PEtab parameter table - observable_df: PEtab observable table - visualization_df: PEtab visualization table - mapping_df: PEtab mapping table - model: The underlying model - extensions_config: Information on the extensions used """ def __init__( @@ -79,14 +68,12 @@ def __init__( parameters_table: core.ParameterTable = None, mapping_table: core.MappingTable = None, visualization_df: pd.DataFrame = None, - extensions_config: dict = None, config: ProblemConfig = None, ): from ..v2.lint import default_validation_tasks self.config = config self.model: Model | None = model - self.extensions_config = extensions_config or {} self.validation_tasks: list[ValidationTask] = ( default_validation_tasks.copy() ) @@ -310,7 +297,7 @@ def get_path(filename): model=model, visualization_df=visualization_df, mapping_df=mapping_df, - extensions_config=config.extensions, + config=config, ) @staticmethod @@ -323,7 +310,6 @@ def from_dfs( visualization_df: pd.DataFrame = None, observable_df: pd.DataFrame = None, mapping_df: pd.DataFrame = None, - extensions_config: dict = None, config: ProblemConfig = None, ): """ @@ -338,7 +324,7 @@ def from_dfs( visualization_df: PEtab visualization table mapping_df: PEtab mapping table model: The underlying model - extensions_config: Information on the extensions used + config: The PEtab problem configuration """ observables_table = core.ObservablesTable.from_df(observable_df) @@ -357,7 +343,6 @@ def from_dfs( parameters_table=parameter_table, mapping_table=mapping_table, visualization_df=visualization_df, - extensions_config=extensions_config, config=config, ) @@ -419,6 +404,8 @@ def get_problem(problem: str | Path | Problem) -> Problem: @property def condition_df(self) -> pd.DataFrame | None: + """Conditions table as DataFrame.""" + # TODO: return empty df? return self.conditions_table.to_df() if self.conditions_table else None @condition_df.setter @@ -427,6 +414,7 @@ def condition_df(self, value: pd.DataFrame): @property def experiment_df(self) -> pd.DataFrame | None: + """Experiments table as DataFrame.""" return ( self.experiments_table.to_df() if self.experiments_table else None ) @@ -437,6 +425,7 @@ def experiment_df(self, value: pd.DataFrame): @property def measurement_df(self) -> pd.DataFrame | None: + """Measurements table as DataFrame.""" return ( self.measurement_table.to_df() if self.measurement_table else None ) @@ -447,6 +436,7 @@ def measurement_df(self, value: pd.DataFrame): @property def parameter_df(self) -> pd.DataFrame | None: + """Parameter table as DataFrame.""" return self.parameter_table.to_df() if self.parameter_table else None @parameter_df.setter @@ -455,6 +445,7 @@ def parameter_df(self, value: pd.DataFrame): @property def observable_df(self) -> pd.DataFrame | None: + """Observables table as DataFrame.""" return ( self.observables_table.to_df() if self.observables_table else None ) @@ -465,6 +456,7 @@ def observable_df(self, value: pd.DataFrame): @property def mapping_df(self) -> pd.DataFrame | None: + """Mapping table as DataFrame.""" return self.mapping_table.to_df() if self.mapping_table else None @mapping_df.setter @@ -473,11 +465,16 @@ def mapping_df(self, value: pd.DataFrame): def get_optimization_parameters(self) -> list[str]: """ - Return list of optimization parameter IDs. + Get list of optimization parameter IDs from parameter table. - See :py:func:`petab.parameters.get_optimization_parameters`. + Arguments: + parameter_df: PEtab parameter DataFrame + + Returns: + List of IDs of parameters selected for optimization + (i.e. those with estimate = True). """ - return parameters.get_optimization_parameters(self.parameter_df) + return [p.id for p in self.parameter_table.parameters if p.estimate] def get_optimization_parameter_scales(self) -> dict[str, str]: """ @@ -485,13 +482,14 @@ def get_optimization_parameter_scales(self) -> dict[str, str]: See :py:func:`petab.parameters.get_optimization_parameters`. """ + # TODO: to be removed in v2? return parameters.get_optimization_parameter_scaling(self.parameter_df) def get_observable_ids(self) -> list[str]: """ Returns dictionary of observable ids. """ - return list(self.observable_df.index) + return [o.id for o in self.observables_table.observables] def _apply_mask(self, v: list, free: bool = True, fixed: bool = True): """Apply mask of only free or only fixed values. @@ -533,7 +531,7 @@ def get_x_ids(self, free: bool = True, fixed: bool = True): ------- The parameter IDs. """ - v = list(self.parameter_df.index.values) + v = [p.id for p in self.parameter_table.parameters] return self._apply_mask(v, free=free, fixed=fixed) @property @@ -553,7 +551,7 @@ def x_fixed_ids(self) -> list[str]: def get_x_nominal( self, free: bool = True, fixed: bool = True, scaled: bool = False - ): + ) -> list: """Generic function to get parameter nominal values. Parameters @@ -571,10 +569,10 @@ def get_x_nominal( ------- The parameter nominal values. """ - if NOMINAL_VALUE in self.parameter_df: - v = list(self.parameter_df[NOMINAL_VALUE]) - else: - v = [nan] * len(self.parameter_df) + v = [ + p.nominal_value if p.nominal_value is not None else nan + for p in self.parameter_table.parameters + ] if scaled: v = list( @@ -636,7 +634,10 @@ def get_lb( ------- The lower parameter bounds. """ - v = list(self.parameter_df[LOWER_BOUND]) + v = [ + p.lb if p.lb is not None else nan + for p in self.parameter_table.parameters + ] if scaled: v = list( parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) @@ -673,7 +674,10 @@ def get_ub( ------- The upper parameter bounds. """ - v = list(self.parameter_df[UPPER_BOUND]) + v = [ + p.ub if p.ub is not None else nan + for p in self.parameter_table.parameters + ] if scaled: v = list( parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) @@ -693,19 +697,22 @@ def ub_scaled(self) -> list: @property def x_free_indices(self) -> list[int]: """Parameter table estimated parameter indices.""" - estimated = list(self.parameter_df[ESTIMATE]) - return [j for j, val in enumerate(estimated) if val != 0] + return [ + i + for i, p in enumerate(self.parameter_table.parameters) + if p.estimate + ] @property def x_fixed_indices(self) -> list[int]: """Parameter table non-estimated parameter indices.""" - estimated = list(self.parameter_df[ESTIMATE]) - return [j for j, val in enumerate(estimated) if val == 0] - - def get_simulation_conditions_from_measurement_df(self) -> pd.DataFrame: - """See :func:`petab.get_simulation_conditions`.""" - return measurements.get_simulation_conditions(self.measurement_df) + return [ + i + for i, p in enumerate(self.parameter_table.parameters) + if not p.estimate + ] + # TODO remove in v2? def get_optimization_to_simulation_parameter_mapping(self, **kwargs): """ See @@ -750,6 +757,7 @@ def sample_parameter_startpoints_dict( ) ] + # TODO: remove in v2? def unscale_parameters( self, x_dict: dict[str, float], @@ -774,6 +782,7 @@ def unscale_parameters( for parameter_id, parameter_value in x_dict.items() } + # TODO: remove in v2? def scale_parameters( self, x_dict: dict[str, float], @@ -806,8 +815,9 @@ def n_estimated(self) -> int: @property def n_measurements(self) -> int: """Number of measurements.""" - return self.measurement_df[MEASUREMENT].notna().sum() + return len(self.measurement_table.measurements) + # TODO: update after implementing priors in `Parameter` @property def n_priors(self) -> int: """Number of priors.""" @@ -834,13 +844,14 @@ def validate( ) validation_results = ValidationResultList() - if self.extensions_config: + if self.config.extensions: + extensions = ",".join(e.name for e in self.config.extensions) validation_results.append( ValidationIssue( ValidationIssueSeverity.WARNING, "Validation of PEtab extensions is not yet implemented, " "but the given problem uses the following extensions: " - f"{'', ''.join(self.extensions_config.keys())}", + f"{extensions}", ) ) @@ -874,7 +885,8 @@ def add_condition( `target_id=(value_type, target_value)`. """ if not kwargs: - return + raise ValueError("Cannot add condition without any changes") + changes = [ core.Change(target_id=target_id, target_value=target_value) for target_id, target_value in kwargs.items() @@ -925,6 +937,7 @@ def add_observable( if transform is not None: record[OBSERVABLE_TRANSFORMATION] = transform record.update(kwargs) + self.observables_table += core.Observable(**record) def add_parameter( @@ -1029,22 +1042,15 @@ def add_measurement( ) ) - def add_mapping(self, petab_id: str, model_id: str): + def add_mapping(self, petab_id: str, model_id: str, name: str = None): """Add a mapping table entry to the problem. Arguments: petab_id: The new PEtab-compatible ID mapping to `model_id` model_id: The ID of some entity in the model """ - record = { - PETAB_ENTITY_ID: [petab_id], - MODEL_ENTITY_ID: [model_id], - } - tmp_df = pd.DataFrame(record).set_index([PETAB_ENTITY_ID]) - self.mapping_df = ( - pd.concat([self.mapping_df, tmp_df]) - if self.mapping_df is not None - else tmp_df + self.mapping_table.mappings.append( + core.Mapping(petab_id=petab_id, model_id=model_id, name=name) ) def add_experiment(self, id_: str, *args): diff --git a/pytest.ini b/pytest.ini index 52eea778..b5f0c04d 100644 --- a/pytest.ini +++ b/pytest.ini @@ -11,5 +11,3 @@ filterwarnings = ignore:Support for PEtab2.0 is experimental:UserWarning ignore:.*inspect.getargspec\(\) is deprecated.*:DeprecationWarning ignore:.*Passing unrecognized arguments to super\(PyDevIPCompleter6\).*:DeprecationWarning - # TODO: until we have proper v2 support - ignore:The experiment table is not yet supported and will be ignored:UserWarning From adbd5b63e2030714d13092676a9f1169c78a6661 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 24 Mar 2025 10:09:57 +0100 Subject: [PATCH 10/19] Faster ConditionsTable.from_df --- petab/v1/lint.py | 5 ++++- petab/v2/core.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/petab/v1/lint.py b/petab/v1/lint.py index acc1a523..e14289fb 100644 --- a/petab/v1/lint.py +++ b/petab/v1/lint.py @@ -53,6 +53,9 @@ "observable_table_has_nontrivial_noise_formula", ] +#: Regular expression pattern for valid PEtab IDs +_petab_id_pattern = re.compile(r"^[a-zA-Z_]\w*$") + def _check_df(df: pd.DataFrame, req_cols: Iterable, name: str) -> None: """Check if given columns are present in DataFrame @@ -1189,7 +1192,7 @@ def is_valid_identifier(x: str) -> bool: if pd.isna(x): return False - return re.match(r"^[a-zA-Z_]\w*$", x) is not None + return _petab_id_pattern.match(x) is not None def check_ids(ids: Iterable[str], kind: str = "") -> None: diff --git a/petab/v2/core.py b/petab/v2/core.py index d0e76628..7052f8ed 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -404,7 +404,7 @@ def from_df(cls, df: pd.DataFrame) -> ConditionsTable: conditions = [] for condition_id, sub_df in df.reset_index().groupby(C.CONDITION_ID): - changes = [Change(**row.to_dict()) for _, row in sub_df.iterrows()] + changes = [Change(**row) for row in sub_df.to_dict("records")] conditions.append(Condition(id=condition_id, changes=changes)) return cls(conditions=conditions) From cd6cd3e1a01e7bcf998a65f8f18a8c6a5b1ecf9b Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 24 Mar 2025 10:25:07 +0100 Subject: [PATCH 11/19] allow extra --- petab/v2/__init__.py | 4 ++++ petab/v2/core.py | 19 +++++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index 490c9fe2..4f8d28ea 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -24,6 +24,10 @@ get_observable_df, write_observable_df, ) +from petab.v1.parameters import ( # noqa: F401, E402 + get_parameter_df, + write_parameter_df, +) from petab.v1.yaml import load_yaml # noqa: F401, E402 # import after v1 diff --git a/petab/v2/core.py b/petab/v2/core.py index 7052f8ed..9ff24ed7 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -25,7 +25,7 @@ from ..v1.lint import is_valid_identifier from ..v1.math import sympify_petab -from . import C +from . import C, get_observable_df __all__ = [ "Observable", @@ -170,7 +170,7 @@ class Observable(BaseModel): #: :meta private: model_config = ConfigDict( - arbitrary_types_allowed=True, populate_by_name=True + arbitrary_types_allowed=True, populate_by_name=True, extra="allow" ) @field_validator( @@ -244,6 +244,7 @@ def from_df(cls, df: pd.DataFrame) -> ObservablesTable: if df is None: return cls(observables=[]) + df = get_observable_df(df) observables = [ Observable(**row.to_dict()) for _, row in df.reset_index().iterrows() @@ -326,6 +327,7 @@ class Change(BaseModel): arbitrary_types_allowed=True, populate_by_name=True, use_enum_values=True, + extra="allow", ) @field_validator("target_value", mode="before") @@ -367,7 +369,7 @@ class Condition(BaseModel): changes: list[Change] #: :meta private: - model_config = ConfigDict(populate_by_name=True) + model_config = ConfigDict(populate_by_name=True, extra="allow") def __add__(self, other: Change) -> Condition: """Add a change to the set.""" @@ -403,7 +405,7 @@ def from_df(cls, df: pd.DataFrame) -> ConditionsTable: return cls(conditions=[]) conditions = [] - for condition_id, sub_df in df.reset_index().groupby(C.CONDITION_ID): + for condition_id, sub_df in df.groupby(C.CONDITION_ID): changes = [Change(**row) for row in sub_df.to_dict("records")] conditions.append(Condition(id=condition_id, changes=changes)) @@ -468,7 +470,7 @@ class ExperimentPeriod(BaseModel): condition_id: str | None = Field(alias=C.CONDITION_ID, default=None) #: :meta private: - model_config = ConfigDict(populate_by_name=True) + model_config = ConfigDict(populate_by_name=True, extra="allow") @field_validator("condition_id", mode="before") @classmethod @@ -497,7 +499,7 @@ class Experiment(BaseModel): #: :meta private: model_config = ConfigDict( - arbitrary_types_allowed=True, populate_by_name=True + arbitrary_types_allowed=True, populate_by_name=True, extra="allow" ) def __add__(self, other: ExperimentPeriod) -> Experiment: @@ -614,7 +616,7 @@ class Measurement(BaseModel): #: :meta private: model_config = ConfigDict( - arbitrary_types_allowed=True, populate_by_name=True + arbitrary_types_allowed=True, populate_by_name=True, extra="allow" ) @field_validator( @@ -738,7 +740,7 @@ class Mapping(BaseModel): ) #: :meta private: - model_config = ConfigDict(populate_by_name=True) + model_config = ConfigDict(populate_by_name=True, extra="allow") class MappingTable(BaseModel): @@ -831,6 +833,7 @@ class Parameter(BaseModel): arbitrary_types_allowed=True, populate_by_name=True, use_enum_values=True, + extra="allow", ) @field_validator("id") From 78741902f46f096c225d493d7b7620e32955d297 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 24 Mar 2025 12:06:01 +0100 Subject: [PATCH 12/19] upconversion --- petab/v2/petab1to2.py | 51 ++++++++++++++++++------------------- tests/v2/test_conversion.py | 9 ++++--- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 93471694..d7b6fb68 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -6,6 +6,7 @@ from contextlib import suppress from itertools import chain from pathlib import Path +from tempfile import TemporaryDirectory from urllib.parse import urlparse from uuid import uuid4 @@ -27,26 +28,37 @@ def petab1to2( Convert a PEtab problem from PEtab 1.0 to PEtab 2.0 format. - Parameters - ---------- - yaml_config: dict | Path | str + :param yaml_config: The PEtab problem as dictionary or YAML file name. - output_dir: Path | str + :param output_dir: The output directory to save the converted PEtab problem, or ``None``, to return a :class:`petab.v2.Problem` instance. - Raises - ------ - ValueError + :raises ValueError: If the input is invalid or does not pass linting or if the generated files do not pass linting. """ - if output_dir is None: - # TODO requires petab.v2.Problem - raise NotImplementedError("Not implemented yet.") - elif isinstance(yaml_config, dict): - raise ValueError("If output_dir is given, yaml_config must be a file.") + if output_dir is not None: + return petab_files_1to2(yaml_config, output_dir) + with TemporaryDirectory() as tmp_dir: + petab_files_1to2(yaml_config, tmp_dir) + return v2.Problem.from_yaml(Path(tmp_dir, Path(yaml_config).name)) + + +def petab_files_1to2(yaml_config: Path | str, output_dir: Path | str): + """Convert PEtab files from PEtab 1.0 to PEtab 2.0. + + + :param yaml_config: + The PEtab problem as dictionary or YAML file name. + :param output_dir: + The output directory to save the converted PEtab problem. + + :raises ValueError: + If the input is invalid or does not pass linting or if the generated + files do not pass linting. + """ if isinstance(yaml_config, Path | str): yaml_file = str(yaml_config) path_prefix = get_path_prefix(yaml_file) @@ -64,6 +76,7 @@ def petab1to2( if get_major_version(yaml_config) != 1: raise ValueError("PEtab problem is not version 1.") petab_problem = v1.Problem.from_yaml(yaml_file or yaml_config) + # TODO: move to mapping table # get rid of conditionName column if present (unsupported in v2) petab_problem.condition_df = petab_problem.condition_df.drop( columns=[v1.C.CONDITION_NAME], errors="ignore" @@ -317,18 +330,4 @@ def v1v2_condition_df( ] ) - targets = set(condition_df[v2.C.TARGET_ID].unique()) - valid_cond_pars = set(model.get_valid_parameters_for_parameter_table()) - # entities to which we assign constant values - constant = targets & valid_cond_pars - # entities to which we assign initial values - initial = set() - for target in targets - constant: - if model.is_state_variable(target): - initial.add(target) - else: - raise NotImplementedError( - f"Unable to determine value type {target} in the condition " - "table." - ) return condition_df diff --git a/tests/v2/test_conversion.py b/tests/v2/test_conversion.py index 4b982fcf..cca62e3d 100644 --- a/tests/v2/test_conversion.py +++ b/tests/v2/test_conversion.py @@ -3,19 +3,20 @@ import pytest +from petab.v2 import Problem from petab.v2.petab1to2 import petab1to2 def test_petab1to2_remote(): + """Test that we can upgrade a remote PEtab 1.0.0 problem.""" yaml_url = ( "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml" ) - with tempfile.TemporaryDirectory(prefix="test_petab1to2") as tmpdirname: - # TODO verify that the v2 files match "ground truth" - # in `petabtests/cases/v2.0.0/sbml/0001/_0001.yaml` - petab1to2(yaml_url, tmpdirname) + problem = petab1to2(yaml_url) + assert isinstance(problem, Problem) + assert len(problem.measurement_table.measurements) try: From 433031e24bdf24dd9adbca72c7920284a7dc9b4e Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 24 Mar 2025 15:31:36 +0100 Subject: [PATCH 13/19] more tests --- doc/modules.rst | 1 + petab/v1/math/sympify.py | 9 +++ petab/v2/core.py | 28 +++++++-- petab/v2/problem.py | 16 +++-- tests/v1/math/test_math.py | 5 ++ tests/v2/test_core.py | 125 ++++++++++++++++++++++++++++++++++--- 6 files changed, 165 insertions(+), 19 deletions(-) diff --git a/doc/modules.rst b/doc/modules.rst index cfc49e67..627ba9d8 100644 --- a/doc/modules.rst +++ b/doc/modules.rst @@ -16,6 +16,7 @@ API Reference petab.v1.core petab.v1.distributions petab.v1.lint + petab.v1.math petab.v1.measurements petab.v1.models petab.v1.observables diff --git a/petab/v1/math/sympify.py b/petab/v1/math/sympify.py index 2f6aabf0..8ef1a129 100644 --- a/petab/v1/math/sympify.py +++ b/petab/v1/math/sympify.py @@ -15,6 +15,11 @@ def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic: """Convert PEtab math expression to sympy expression. + .. note:: + + All symbols in the returned expression will have the `real=True` + assumption. + Args: expr: PEtab math expression. @@ -26,6 +31,10 @@ def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic: The sympy expression corresponding to `expr`. Boolean values are converted to numeric values. """ + if isinstance(expr, sp.Expr): + # TODO: check if only PEtab-compatible symbols and functions are used + return expr + if isinstance(expr, int) or isinstance(expr, np.integer): return sp.Integer(expr) if isinstance(expr, float) or isinstance(expr, np.floating): diff --git a/petab/v2/core.py b/petab/v2/core.py index 9ff24ed7..6a716ca5 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -56,6 +56,14 @@ def _is_finite_or_neg_inf(v: float, info: ValidationInfo) -> float: return v +def _is_finite_or_pos_inf(v: float, info: ValidationInfo) -> float: + if not np.isfinite(v) and v != np.inf: + raise ValueError( + f"{info.field_name} value must be finite or inf but got {v}" + ) + return v + + def _not_nan(v: float, info: ValidationInfo) -> float: if np.isnan(v): raise ValueError(f"{info.field_name} value must not be nan.") @@ -201,10 +209,6 @@ def _sympify(cls, v): def _placeholders( self, type_: Literal["observable", "noise"] ) -> set[sp.Symbol]: - # TODO: add field validator to check for 1-based consecutive numbering - t = f"{re.escape(type_)}Parameter" - o = re.escape(self.id) - pattern = re.compile(rf"(?:^|\W)({t}\d+_{o})(?=\W|$)") formula = ( self.formula if type_ == "observable" @@ -212,7 +216,17 @@ def _placeholders( if type_ == "noise" else None ) - return {s for s in formula.free_symbols if pattern.match(str(s))} + if formula is None or formula.is_number: + return set() + + if not (free_syms := formula.free_symbols): + return set() + + # TODO: add field validator to check for 1-based consecutive numbering + t = f"{re.escape(type_)}Parameter" + o = re.escape(self.id) + pattern = re.compile(rf"(?:^|\W)({t}\d+_{o})(?=\W|$)") + return {s for s in free_syms if pattern.match(str(s))} @property def observable_placeholders(self) -> set[sp.Symbol]: @@ -600,7 +614,9 @@ class Measurement(BaseModel): #: The experiment ID. experiment_id: str | None = Field(alias=C.EXPERIMENT_ID, default=None) #: The time point of the measurement in time units as defined in the model. - time: float = Field(alias=C.TIME) + time: Annotated[float, AfterValidator(_is_finite_or_pos_inf)] = Field( + alias=C.TIME + ) #: The measurement value. measurement: Annotated[float, AfterValidator(_not_nan)] = Field( alias=C.MEASUREMENT diff --git a/petab/v2/problem.py b/petab/v2/problem.py index ec13ba88..6ef9d35b 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -203,10 +203,10 @@ def get_path(filename): if yaml.is_composite_problem(yaml_config): raise ValueError( - "petab.Problem.from_yaml() can only be used for " + "petab.v2.Problem.from_yaml() can only be used for " "yaml files comprising a single model. " "Consider using " - "petab.CompositeProblem.from_yaml() instead." + "petab.v2.CompositeProblem.from_yaml() instead." ) config = ProblemConfig( **yaml_config, base_path=base_path, filepath=yaml_file @@ -350,13 +350,13 @@ def from_dfs( def from_combine(filename: Path | str) -> Problem: """Read PEtab COMBINE archive (http://co.mbine.org/documents/archive). - See also :py:func:`petab.create_combine_archive`. + See also :py:func:`petab.v2.create_combine_archive`. Arguments: filename: Path to the PEtab-COMBINE archive Returns: - A :py:class:`petab.Problem` instance. + A :py:class:`petab.v2.Problem` instance. """ # function-level import, because module-level import interfered with # other SWIG interfaces @@ -882,7 +882,7 @@ def add_condition( id_: The condition id name: The condition name kwargs: Entities to be added to the condition table in the form - `target_id=(value_type, target_value)`. + `target_id=target_value`. """ if not kwargs: raise ValueError("Cannot add condition without any changes") @@ -1132,19 +1132,25 @@ class ExtensionConfig(BaseModel): class ProblemConfig(BaseModel): """The PEtab problem configuration.""" + #: The path to the PEtab problem configuration. filepath: str | AnyUrl | None = Field( None, description="The path to the PEtab problem configuration.", exclude=True, ) + #: The base path to resolve relative paths. base_path: str | AnyUrl | None = Field( None, description="The base path to resolve relative paths.", exclude=True, ) + #: The PEtab format version. format_version: str = "2.0.0" + #: The path to the parameter file, relative to ``base_path``. parameter_file: str | AnyUrl | None = None + #: The list of problems in the configuration. problems: list[SubProblem] = [] + #: Extensiions used by the problem. extensions: list[ExtensionConfig] = [] def to_yaml(self, filename: str | Path): diff --git a/tests/v1/math/test_math.py b/tests/v1/math/test_math.py index 828aac88..dae78154 100644 --- a/tests/v1/math/test_math.py +++ b/tests/v1/math/test_math.py @@ -74,6 +74,11 @@ def test_ids(): """Test symbols in expressions.""" assert sympify_petab("bla * 2") == 2.0 * sp.Symbol("bla", real=True) + # test that sympy expressions that are invalid in PEtab raise an error + # TODO: handle these cases after + # https://github.com/PEtab-dev/libpetab-python/pull/364 + # sympify_petab(sp.Symbol("föö")) + def test_syntax_error(): """Test exceptions upon syntax errors.""" diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 672b733a..11700950 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -1,16 +1,12 @@ import tempfile from pathlib import Path +import pytest import sympy as sp +from pydantic import ValidationError +from sympy.abc import x, y -from petab.v2.core import ( - Change, - Condition, - ConditionsTable, - Experiment, - ExperimentPeriod, - ObservablesTable, -) +from petab.v2.core import * from petab.v2.petab1to2 import petab1to2 example_dir_fujita = Path(__file__).parents[2] / "doc/example/example_Fujita" @@ -73,3 +69,116 @@ def test_conditions_table_add_changes(): conditions_table += c2 assert conditions_table.conditions == [c1, c2] + + +def test_measurments(): + Measurement( + observable_id="obs1", time=1, experiment_id="exp1", measurement=1 + ) + Measurement( + observable_id="obs1", time="1", experiment_id="exp1", measurement="1" + ) + Measurement( + observable_id="obs1", time="inf", experiment_id="exp1", measurement="1" + ) + + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + observable_parameters=["p1"], + noise_parameters=["n1"], + ) + + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + observable_parameters=[1], + noise_parameters=[2], + ) + + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + observable_parameters=[sp.sympify("x ** y")], + noise_parameters=[sp.sympify("x ** y")], + ) + + assert ( + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + non_petab=1, + ).non_petab + == 1 + ) + + with pytest.raises(ValidationError, match="got -inf"): + Measurement( + observable_id="obs1", + time="-inf", + experiment_id="exp1", + measurement=1, + ) + + with pytest.raises(ValidationError, match="Invalid ID"): + Measurement( + observable_id="1_obs", time=1, experiment_id="exp1", measurement=1 + ) + + with pytest.raises(ValidationError, match="Invalid ID"): + Measurement( + observable_id="obs", time=1, experiment_id=" exp1", measurement=1 + ) + + +def test_observable(): + Observable(id="obs1", formula=x + y) + Observable(id="obs1", formula="x + y", noise_formula="x + y") + Observable(id="obs1", formula=1, noise_formula=2) + Observable( + id="obs1", + formula="x + y", + noise_formula="x + y", + observable_parameters=["p1"], + noise_parameters=["n1"], + ) + Observable( + id="obs1", + formula=sp.sympify("x + y"), + noise_formula=sp.sympify("x + y"), + observable_parameters=[sp.Symbol("p1")], + noise_parameters=[sp.Symbol("n1")], + ) + assert Observable(id="obs1", formula="x + y", non_petab=1).non_petab == 1 + + o = Observable(id="obs1", formula=x + y) + assert o.observable_placeholders == set() + assert o.noise_placeholders == set() + + o = Observable( + id="obs1", + formula="observableParameter1_obs1", + noise_formula="noiseParameter1_obs1", + ) + assert o.observable_placeholders == { + sp.Symbol("observableParameter1_obs1", real=True), + } + assert o.noise_placeholders == { + sp.Symbol("noiseParameter1_obs1", real=True) + } + + # TODO: this should raise an error + # (numbering is not consecutive / not starting from 1) + # TODO: clarify if observableParameter0_obs1 would be allowed + # as regular parameter + # + # with pytest.raises(ValidationError): + # Observable(id="obs1", formula="observableParameter2_obs1") From ff093d0eaf9719037b916f1acaeb3276992f1ad1 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 24 Mar 2025 15:52:40 +0100 Subject: [PATCH 14/19] more tests --- petab/v2/core.py | 2 ++ tests/v2/test_core.py | 69 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/petab/v2/core.py b/petab/v2/core.py index 6a716ca5..88e115cb 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -842,7 +842,9 @@ class Parameter(BaseModel): ) #: Is the parameter to be estimated? estimate: bool = Field(alias=C.ESTIMATE, default=True) + # TODO priors + # pydantic vs. petab.v1.priors.Prior #: :meta private: model_config = ConfigDict( diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 11700950..5c2cda05 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -182,3 +182,72 @@ def test_observable(): # # with pytest.raises(ValidationError): # Observable(id="obs1", formula="observableParameter2_obs1") + + +def test_change(): + Change(target_id="k1", target_value=1) + Change(target_id="k1", target_value="x * y") + + assert ( + Change(target_id="k1", target_value=x * y, non_petab="foo").non_petab + == "foo" + ) + with pytest.raises(ValidationError, match="Invalid ID"): + Change(target_id="1_k", target_value=x) + + with pytest.raises(ValidationError, match="input_value=None"): + Change(target_id="k1", target_value=None) + + +def test_period(): + ExperimentPeriod(time=0) + ExperimentPeriod(time=1, condition_id="p1") + ExperimentPeriod(time="-inf", condition_id="p1") + + assert ( + ExperimentPeriod(time="1", condition_id="p1", non_petab=1).non_petab + == 1 + ) + + with pytest.raises(ValidationError, match="got inf"): + ExperimentPeriod(time="inf", condition_id="p1") + + with pytest.raises(ValidationError, match="Invalid ID"): + ExperimentPeriod(time=1, condition_id="1_condition") + + with pytest.raises(ValidationError, match="type=missing"): + ExperimentPeriod(condition_id="condition") + + +def test_parameter(): + Parameter(id="k1", lb=1, ub=2) + Parameter(id="k1", estimate=False, nominal_value=1) + + assert Parameter(id="k1", lb=1, ub=2, non_petab=1).non_petab == 1 + + with pytest.raises(ValidationError, match="Invalid ID"): + Parameter(id="1_k", lb=1, ub=2) + + with pytest.raises(ValidationError, match="upper"): + Parameter(id="k1", lb=1) + + with pytest.raises(ValidationError, match="lower"): + Parameter(id="k1", ub=1) + + with pytest.raises(ValidationError, match="less than"): + Parameter(id="k1", lb=2, ub=1) + + +def test_experiment(): + Experiment(id="experiment1") + Experiment( + id="experiment1", periods=[ExperimentPeriod(time=1, condition_id="c1")] + ) + + assert Experiment(id="experiment1", non_petab=1).non_petab == 1 + + with pytest.raises(ValidationError, match="Field required"): + Experiment() + + with pytest.raises(ValidationError, match="Invalid ID"): + Experiment(id="experiment 1") From 87b699ea477891b95db30503ce11503c636142bc Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 24 Mar 2025 16:31:06 +0100 Subject: [PATCH 15/19] todo --- petab/v2/core.py | 10 +++++++++- petab/v2/lint.py | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 88e115cb..f5cd5636 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -837,6 +837,7 @@ class Parameter(BaseModel): #: Nominal value. nominal_value: float | None = Field(alias=C.NOMINAL_VALUE, default=None) #: Parameter scale. + # TODO: keep or remove? scale: ParameterScale = Field( alias=C.PARAMETER_SCALE, default=ParameterScale.LIN ) @@ -902,12 +903,19 @@ def _validate(self) -> Self: "Estimated parameter must have lower and upper bounds set" ) - if self.lb is not None and self.ub is not None and self.lb >= self.ub: + # TODO: also if not estimated? + if ( + self.estimate + and self.lb is not None + and self.ub is not None + and self.lb >= self.ub + ): raise ValueError("Lower bound must be less than upper bound.") # TODO parameterScale? # TODO priorType, priorParameters + return self diff --git a/petab/v2/lint.py b/petab/v2/lint.py index e9ef6f3d..ba347c13 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -400,6 +400,9 @@ class CheckUniquePrimaryKeys(ValidationTask): """Check that all primary keys are unique.""" def run(self, problem: Problem) -> ValidationIssue | None: + # TODO: check that IDs are globally unique + # -- replaces CheckObservablesDoNotShadowModelEntities + # check for uniqueness of all primary keys counter = Counter(c.id for c in problem.conditions_table.conditions) duplicates = {id for id, count in counter.items() if count > 1} From 847d4a0a812b0f8511837d3a1a71f7022f2b9132 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 24 Mar 2025 16:46:03 +0100 Subject: [PATCH 16/19] cond. free syms --- petab/v2/conditions.py | 21 --------------------- petab/v2/core.py | 18 ++++++++++++++++++ petab/v2/lint.py | 5 ++--- tests/v2/test_core.py | 25 +++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 24 deletions(-) diff --git a/petab/v2/conditions.py b/petab/v2/conditions.py index 124f8f2b..deea1a0c 100644 --- a/petab/v2/conditions.py +++ b/petab/v2/conditions.py @@ -2,15 +2,11 @@ from __future__ import annotations -from itertools import chain from pathlib import Path import pandas as pd -import sympy as sp -from .. import v2 from ..v1.lint import assert_no_leading_trailing_whitespace -from .C import * __all__ = [ "get_condition_df", @@ -50,20 +46,3 @@ def write_condition_df(df: pd.DataFrame, filename: str | Path) -> None: """ df = get_condition_df(df) df.to_csv(filename, sep="\t", index=False) - - -def get_condition_table_free_symbols(problem: v2.Problem) -> set[sp.Basic]: - """Free symbols from condition table assignments. - - Collects all free symbols from the condition table `targetValue` column. - - :returns: Set of free symbols. - """ - return set( - chain.from_iterable( - change.target_value.free_symbols - for condition in problem.conditions_table.conditions - for change in condition.changes - if change.target_value is not None - ) - ) diff --git a/petab/v2/core.py b/petab/v2/core.py index f5cd5636..8b16e554 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -5,6 +5,7 @@ import re from collections.abc import Sequence from enum import Enum +from itertools import chain from pathlib import Path from typing import Annotated, Literal @@ -468,6 +469,23 @@ def __iadd__(self, other: Condition) -> ConditionsTable: self.conditions.append(other) return self + @property + def free_symbols(self) -> set[sp.Symbol]: + """Get all free symbols in the conditions table. + + This includes all free symbols in the target values of the changes, + independently of whether it is referenced by any experiment, or + (indirectly) by any measurement. + """ + return set( + chain.from_iterable( + change.target_value.free_symbols + for condition in self.conditions + for change in condition.changes + if change.target_value is not None + ) + ) + class ExperimentPeriod(BaseModel): """A period of a timecourse or experiment defined by a start time diff --git a/petab/v2/lint.py b/petab/v2/lint.py index ba347c13..d18bc709 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -13,7 +13,6 @@ import pandas as pd import sympy as sp -from .. import v2 from .problem import Problem logger = logging.getLogger(__name__) @@ -743,7 +742,7 @@ def append_overrides(overrides): append_overrides(measurement.noise_parameters) # Append parameter overrides from condition table - for p in v2.conditions.get_condition_table_free_symbols(problem): + for p in problem.conditions_table.free_symbols: parameter_ids[str(p)] = None return set(parameter_ids.keys()) @@ -822,7 +821,7 @@ def append_overrides(overrides): # model parameter_ids.update( str(p) - for p in v2.conditions.get_condition_table_free_symbols(problem) + for p in problem.conditions_table.free_symbols if not problem.model.has_entity_with_id(str(p)) ) diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 5c2cda05..57fec8a4 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -251,3 +251,28 @@ def test_experiment(): with pytest.raises(ValidationError, match="Invalid ID"): Experiment(id="experiment 1") + + +def test_conditions_table(): + assert ConditionsTable().free_symbols == set() + + assert ( + ConditionsTable( + conditions=[ + Condition( + id="condition1", + changes=[Change(target_id="k1", target_value="true")], + ) + ] + ).free_symbols + == set() + ) + + assert ConditionsTable( + conditions=[ + Condition( + id="condition1", + changes=[Change(target_id="k1", target_value=x / y)], + ) + ] + ).free_symbols == {x, y} From 1392112cb6ca79c7643803329e6ff0587178cc86 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 25 Mar 2025 14:27:30 +0100 Subject: [PATCH 17/19] .. --- petab/v2/core.py | 2 ++ tests/v2/test_conversion.py | 9 ++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 8b16e554..ae8063c0 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -859,6 +859,8 @@ class Parameter(BaseModel): scale: ParameterScale = Field( alias=C.PARAMETER_SCALE, default=ParameterScale.LIN ) + # TODO: change to bool in PEtab, or serialize as 0/1? + # https://github.com/PEtab-dev/PEtab/discussions/610 #: Is the parameter to be estimated? estimate: bool = Field(alias=C.ESTIMATE, default=True) diff --git a/tests/v2/test_conversion.py b/tests/v2/test_conversion.py index cca62e3d..612606ab 100644 --- a/tests/v2/test_conversion.py +++ b/tests/v2/test_conversion.py @@ -1,5 +1,4 @@ import logging -import tempfile import pytest @@ -37,10 +36,10 @@ def test_benchmark_collection(problem_id): logging.basicConfig(level=logging.DEBUG) if problem_id == "Froehlich_CellSystems2018": + # this is mostly about 6M sympifications in the condition table pytest.skip("Too slow. Re-enable once we are faster.") yaml_path = benchmark_models_petab.get_problem_yaml_path(problem_id) - with tempfile.TemporaryDirectory( - prefix=f"test_petab1to2_{problem_id}" - ) as tmpdirname: - petab1to2(yaml_path, tmpdirname) + problem = petab1to2(yaml_path) + assert isinstance(problem, Problem) + assert len(problem.measurement_table.measurements) From 958d1917a93101b93c45e9eed51860e873658e8b Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 27 Mar 2025 11:50:46 +0100 Subject: [PATCH 18/19] more coherent naming --- petab/v2/C.py | 16 ++++---- petab/v2/core.py | 44 ++++++++++----------- petab/v2/lint.py | 58 +++++++++++++-------------- petab/v2/problem.py | 92 +++++++++++++++++++++---------------------- tests/v2/test_core.py | 32 +++++++-------- 5 files changed, 119 insertions(+), 123 deletions(-) diff --git a/petab/v2/C.py b/petab/v2/C.py index 545124a4..c94a1d29 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -147,25 +147,25 @@ # OBSERVABLES -#: Observable name column in the observables table +#: Observable name column in the observable table OBSERVABLE_NAME = "observableName" -#: Observable formula column in the observables table +#: Observable formula column in the observable table OBSERVABLE_FORMULA = "observableFormula" -#: Noise formula column in the observables table +#: Noise formula column in the observable table NOISE_FORMULA = "noiseFormula" -#: Observable transformation column in the observables table +#: Observable transformation column in the observable table OBSERVABLE_TRANSFORMATION = "observableTransformation" -#: Noise distribution column in the observables table +#: Noise distribution column in the observable table NOISE_DISTRIBUTION = "noiseDistribution" -#: Mandatory columns of observables table +#: Mandatory columns of observable table OBSERVABLE_DF_REQUIRED_COLS = [ OBSERVABLE_ID, OBSERVABLE_FORMULA, NOISE_FORMULA, ] -#: Optional columns of observables table +#: Optional columns of observable table OBSERVABLE_DF_OPTIONAL_COLS = [ OBSERVABLE_NAME, OBSERVABLE_TRANSFORMATION, @@ -378,7 +378,7 @@ #: Simulated value column in the simulation table SIMULATION = "simulation" -#: Residual value column in the residuals table +#: Residual value column in the residual table RESIDUAL = "residual" #: ??? NOISE_VALUE = "noiseValue" diff --git a/petab/v2/core.py b/petab/v2/core.py index ae8063c0..7f2c6c9f 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -30,15 +30,15 @@ __all__ = [ "Observable", - "ObservablesTable", + "ObservableTable", "ObservableTransformation", "NoiseDistribution", "Change", "Condition", - "ConditionsTable", + "ConditionTable", "ExperimentPeriod", "Experiment", - "ExperimentsTable", + "ExperimentTable", "Measurement", "MeasurementTable", "Mapping", @@ -240,7 +240,7 @@ def noise_placeholders(self) -> set[sp.Symbol]: return self._placeholders("noise") -class ObservablesTable(BaseModel): +class ObservableTable(BaseModel): """PEtab observables table.""" #: List of observables. @@ -254,7 +254,7 @@ def __getitem__(self, observable_id: str) -> Observable: raise KeyError(f"Observable ID {observable_id} not found") @classmethod - def from_df(cls, df: pd.DataFrame) -> ObservablesTable: + def from_df(cls, df: pd.DataFrame) -> ObservableTable: """Create an ObservablesTable from a DataFrame.""" if df is None: return cls(observables=[]) @@ -293,7 +293,7 @@ def to_df(self) -> pd.DataFrame: return pd.DataFrame(records).set_index([C.OBSERVABLE_ID]) @classmethod - def from_tsv(cls, file_path: str | Path) -> ObservablesTable: + def from_tsv(cls, file_path: str | Path) -> ObservableTable: """Create an ObservablesTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) @@ -303,13 +303,13 @@ def to_tsv(self, file_path: str | Path) -> None: df = self.to_df() df.to_csv(file_path, sep="\t", index=True) - def __add__(self, other: Observable) -> ObservablesTable: + def __add__(self, other: Observable) -> ObservableTable: """Add an observable to the table.""" if not isinstance(other, Observable): raise TypeError("Can only add Observable to ObservablesTable") - return ObservablesTable(observables=self.observables + [other]) + return ObservableTable(observables=self.observables + [other]) - def __iadd__(self, other: Observable) -> ObservablesTable: + def __iadd__(self, other: Observable) -> ObservableTable: """Add an observable to the table in place.""" if not isinstance(other, Observable): raise TypeError("Can only add Observable to ObservablesTable") @@ -321,7 +321,7 @@ class Change(BaseModel): """A change to the model or model state. A change to the model or model state, corresponding to an individual - row of the PEtab conditions table. + row of the PEtab condition table. >>> Change( ... target_id="k1", @@ -400,7 +400,7 @@ def __iadd__(self, other: Change) -> Condition: return self -class ConditionsTable(BaseModel): +class ConditionTable(BaseModel): """PEtab conditions table.""" #: List of conditions. @@ -414,7 +414,7 @@ def __getitem__(self, condition_id: str) -> Condition: raise KeyError(f"Condition ID {condition_id} not found") @classmethod - def from_df(cls, df: pd.DataFrame) -> ConditionsTable: + def from_df(cls, df: pd.DataFrame) -> ConditionTable: """Create a ConditionsTable from a DataFrame.""" if df is None or df.empty: return cls(conditions=[]) @@ -446,7 +446,7 @@ def to_df(self) -> pd.DataFrame: ) @classmethod - def from_tsv(cls, file_path: str | Path) -> ConditionsTable: + def from_tsv(cls, file_path: str | Path) -> ConditionTable: """Create a ConditionsTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) @@ -456,13 +456,13 @@ def to_tsv(self, file_path: str | Path) -> None: df = self.to_df() df.to_csv(file_path, sep="\t", index=False) - def __add__(self, other: Condition) -> ConditionsTable: + def __add__(self, other: Condition) -> ConditionTable: """Add a condition to the table.""" if not isinstance(other, Condition): raise TypeError("Can only add Conditions to ConditionsTable") - return ConditionsTable(conditions=self.conditions + [other]) + return ConditionTable(conditions=self.conditions + [other]) - def __iadd__(self, other: Condition) -> ConditionsTable: + def __iadd__(self, other: Condition) -> ConditionTable: """Add a condition to the table in place.""" if not isinstance(other, Condition): raise TypeError("Can only add Conditions to ConditionsTable") @@ -548,14 +548,14 @@ def __iadd__(self, other: ExperimentPeriod) -> Experiment: return self -class ExperimentsTable(BaseModel): +class ExperimentTable(BaseModel): """PEtab experiments table.""" #: List of experiments. experiments: list[Experiment] @classmethod - def from_df(cls, df: pd.DataFrame) -> ExperimentsTable: + def from_df(cls, df: pd.DataFrame) -> ExperimentTable: """Create an ExperimentsTable from a DataFrame.""" if df is None: return cls(experiments=[]) @@ -589,7 +589,7 @@ def to_df(self) -> pd.DataFrame: ) @classmethod - def from_tsv(cls, file_path: str | Path) -> ExperimentsTable: + def from_tsv(cls, file_path: str | Path) -> ExperimentTable: """Create an ExperimentsTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) @@ -599,13 +599,13 @@ def to_tsv(self, file_path: str | Path) -> None: df = self.to_df() df.to_csv(file_path, sep="\t", index=False) - def __add__(self, other: Experiment) -> ExperimentsTable: + def __add__(self, other: Experiment) -> ExperimentTable: """Add an experiment to the table.""" if not isinstance(other, Experiment): raise TypeError("Can only add Experiment to ExperimentsTable") - return ExperimentsTable(experiments=self.experiments + [other]) + return ExperimentTable(experiments=self.experiments + [other]) - def __iadd__(self, other: Experiment) -> ExperimentsTable: + def __iadd__(self, other: Experiment) -> ExperimentTable: """Add an experiment to the table in place.""" if not isinstance(other, Experiment): raise TypeError("Can only add Experiment to ExperimentsTable") diff --git a/petab/v2/lint.py b/petab/v2/lint.py index d18bc709..71d655dd 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -243,12 +243,12 @@ def run(self, problem: Problem) -> ValidationIssue | None: m.observable_id for m in problem.measurement_table.measurements } defined_observables = { - o.id for o in problem.observables_table.observables + o.id for o in problem.observable_table.observables } if undefined_observables := (used_observables - defined_observables): return ValidationError( f"Observables {undefined_observables} used in " - "measurement table but not defined in observables table." + "measurement table but not defined in observable table." ) @@ -259,11 +259,11 @@ class CheckOverridesMatchPlaceholders(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: observable_parameters_count = { o.id: len(o.observable_placeholders) - for o in problem.observables_table.observables + for o in problem.observable_table.observables } noise_parameters_count = { o.id: len(o.noise_placeholders) - for o in problem.observables_table.observables + for o in problem.observable_table.observables } messages = [] for m in problem.measurement_table.measurements: @@ -280,7 +280,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: actual = len(m.observable_parameters) if actual != expected: - formula = problem.observables_table[m.observable_id].formula + formula = problem.observable_table[m.observable_id].formula messages.append( f"Mismatch of observable parameter overrides for " f"{m.observable_id} ({formula})" @@ -306,7 +306,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: "noiseParameters column." ) else: - formula = problem.observables_table[ + formula = problem.observable_table[ m.observable_id ].noise_formula messages.append( @@ -329,7 +329,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: log_observables = { o.id - for o in problem.observables_table.observables + for o in problem.observable_table.observables if o.transformation in [ot.LOG, ot.LOG10] } if log_observables: @@ -359,7 +359,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: # check that measured experiments exist available_experiments = { - e.id for e in problem.experiments_table.experiments + e.id for e in problem.experiment_table.experiments } if missing_experiments := (used_experiments - available_experiments): return ValidationError( @@ -385,7 +385,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: used_targets = { change.target_id - for cond in problem.conditions_table.conditions + for cond in problem.condition_table.conditions for change in cond.changes } @@ -403,7 +403,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: # -- replaces CheckObservablesDoNotShadowModelEntities # check for uniqueness of all primary keys - counter = Counter(c.id for c in problem.conditions_table.conditions) + counter = Counter(c.id for c in problem.condition_table.conditions) duplicates = {id for id, count in counter.items() if count > 1} if duplicates: @@ -411,7 +411,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: f"Condition table contains duplicate IDs: {duplicates}" ) - counter = Counter(o.id for o in problem.observables_table.observables) + counter = Counter(o.id for o in problem.observable_table.observables) duplicates = {id for id, count in counter.items() if count > 1} if duplicates: @@ -419,7 +419,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: f"Observable table contains duplicate IDs: {duplicates}" ) - counter = Counter(e.id for e in problem.experiments_table.experiments) + counter = Counter(e.id for e in problem.experiment_table.experiments) duplicates = {id for id, count in counter.items() if count > 1} if duplicates: @@ -441,12 +441,12 @@ class CheckObservablesDoNotShadowModelEntities(ValidationTask): # TODO: all PEtab entity IDs must be disjoint from the model entity IDs def run(self, problem: Problem) -> ValidationIssue | None: - if not problem.observables_table.observables or problem.model is None: + if not problem.observable_table.observables or problem.model is None: return None shadowed_entities = [ o.id - for o in problem.observables_table.observables + for o in problem.observable_table.observables if problem.model.has_entity_with_id(o.id) ] if shadowed_entities: @@ -460,7 +460,7 @@ class CheckExperimentTable(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: messages = [] - for experiment in problem.experiments_table.experiments: + for experiment in problem.experiment_table.experiments: # Check that there are no duplicate timepoints counter = Counter(period.time for period in experiment.periods) duplicates = {time for time, count in counter.items() if count > 1} @@ -481,9 +481,9 @@ class CheckExperimentConditionsExist(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: messages = [] available_conditions = { - c.id for c in problem.conditions_table.conditions + c.id for c in problem.condition_table.conditions } - for experiment in problem.experiments_table.experiments: + for experiment in problem.experiment_table.experiments: missing_conditions = { period.condition_id for period in experiment.periods @@ -575,7 +575,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: entities_in_condition_table = { change.target_id - for cond in problem.conditions_table.conditions + for cond in problem.condition_table.conditions for change in cond.changes } entities_in_parameter_table = { @@ -630,7 +630,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: if m.experiment_id is not None } available_experiments = { - e.id for e in problem.experiments_table.experiments + e.id for e in problem.experiment_table.experiments } unused_experiments = available_experiments - used_experiments @@ -648,12 +648,12 @@ class CheckUnusedConditions(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: used_conditions = { p.condition_id - for e in problem.experiments_table.experiments + for e in problem.experiment_table.experiments for p in e.periods if p.condition_id is not None } available_conditions = { - c.id for c in problem.conditions_table.conditions + c.id for c in problem.condition_table.conditions } unused_conditions = available_conditions - used_conditions @@ -707,7 +707,7 @@ def get_valid_parameters_for_parameter_table( # condition table targets invalid |= { change.target_id - for cond in problem.conditions_table.conditions + for cond in problem.condition_table.conditions for change in cond.changes } @@ -742,7 +742,7 @@ def append_overrides(overrides): append_overrides(measurement.noise_parameters) # Append parameter overrides from condition table - for p in problem.conditions_table.free_symbols: + for p in problem.condition_table.free_symbols: parameter_ids[str(p)] = None return set(parameter_ids.keys()) @@ -765,7 +765,7 @@ def get_required_parameters_for_parameter_table( parameter_ids = set() condition_targets = { change.target_id - for cond in problem.conditions_table.conditions + for cond in problem.condition_table.conditions for change in cond.changes } @@ -786,7 +786,7 @@ def append_overrides(overrides): # TODO remove `observable_ids` when # `get_output_parameters` is updated for PEtab v2/v1.1, where # observable IDs are allowed in observable formulae - observable_ids = {o.id for o in problem.observables_table.observables} + observable_ids = {o.id for o in problem.observable_table.observables} # Add output parameters except for placeholders for formula_type, placeholder_sources in ( @@ -821,7 +821,7 @@ def append_overrides(overrides): # model parameter_ids.update( str(p) - for p in problem.conditions_table.free_symbols + for p in problem.condition_table.free_symbols if not problem.model.has_entity_with_id(str(p)) ) @@ -852,11 +852,11 @@ def get_output_parameters( formulas = [] if observables: formulas.extend( - o.formula for o in problem.observables_table.observables + o.formula for o in problem.observable_table.observables ) if noise: formulas.extend( - o.noise_formula for o in problem.observables_table.observables + o.noise_formula for o in problem.observable_table.observables ) output_parameters = OrderedDict() @@ -906,7 +906,7 @@ def get_placeholders( # collect placeholder parameters overwritten by # {observable,noise}Parameters placeholders = [] - for o in problem.observables_table.observables: + for o in problem.observable_table.observables: if observables: placeholders.extend(map(str, o.observable_placeholders)) if noise: diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 6ef9d35b..fede0762 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -50,7 +50,7 @@ class Problem: - experiment table - measurement table - parameter table - - observables table + - observable table - mapping table Optionally, it may contain visualization tables. @@ -61,11 +61,11 @@ class Problem: def __init__( self, model: Model = None, - conditions_table: core.ConditionsTable = None, - experiments_table: core.ExperimentsTable = None, - observables_table: core.ObservablesTable = None, + condition_table: core.ConditionTable = None, + experiment_table: core.ExperimentTable = None, + observable_table: core.ObservableTable = None, measurement_table: core.MeasurementTable = None, - parameters_table: core.ParameterTable = None, + parameter_table: core.ParameterTable = None, mapping_table: core.MappingTable = None, visualization_df: pd.DataFrame = None, config: ProblemConfig = None, @@ -78,20 +78,20 @@ def __init__( default_validation_tasks.copy() ) - self.observables_table = observables_table or core.ObservablesTable( + self.observable_table = observable_table or core.ObservableTable( observables=[] ) - self.conditions_table = conditions_table or core.ConditionsTable( + self.condition_table = condition_table or core.ConditionTable( conditions=[] ) - self.experiments_table = experiments_table or core.ExperimentsTable( + self.experiment_table = experiment_table or core.ExperimentTable( experiments=[] ) self.measurement_table = measurement_table or core.MeasurementTable( measurements=[] ) self.mapping_table = mapping_table or core.MappingTable(mappings=[]) - self.parameter_table = parameters_table or core.ParameterTable( + self.parameter_table = parameter_table or core.ParameterTable( parameters=[] ) @@ -100,13 +100,13 @@ def __init__( def __str__(self): model = f"with model ({self.model})" if self.model else "without model" - ne = len(self.experiments_table.experiments) + ne = len(self.experiment_table.experiments) experiments = f"{ne} experiments" - nc = len(self.conditions_table.conditions) + nc = len(self.condition_table.conditions) conditions = f"{nc} conditions" - no = len(self.observables_table.observables) + no = len(self.observable_table.observables) observables = f"{no} observables" nm = len(self.measurement_table.measurements) @@ -129,9 +129,9 @@ def __getitem__(self, key): Accessing model entities is not currently not supported. """ for table in ( - self.conditions_table, - self.experiments_table, - self.observables_table, + self.condition_table, + self.experiment_table, + self.observable_table, self.measurement_table, self.parameter_table, self.mapping_table, @@ -327,20 +327,20 @@ def from_dfs( config: The PEtab problem configuration """ - observables_table = core.ObservablesTable.from_df(observable_df) - conditions_table = core.ConditionsTable.from_df(condition_df) - experiments_table = core.ExperimentsTable.from_df(experiment_df) + observable_table = core.ObservableTable.from_df(observable_df) + condition_table = core.ConditionTable.from_df(condition_df) + experiment_table = core.ExperimentTable.from_df(experiment_df) measurement_table = core.MeasurementTable.from_df(measurement_df) mapping_table = core.MappingTable.from_df(mapping_df) parameter_table = core.ParameterTable.from_df(parameter_df) return Problem( model=model, - conditions_table=conditions_table, - experiments_table=experiments_table, - observables_table=observables_table, + condition_table=condition_table, + experiment_table=experiment_table, + observable_table=observable_table, measurement_table=measurement_table, - parameters_table=parameter_table, + parameter_table=parameter_table, mapping_table=mapping_table, visualization_df=visualization_df, config=config, @@ -404,28 +404,26 @@ def get_problem(problem: str | Path | Problem) -> Problem: @property def condition_df(self) -> pd.DataFrame | None: - """Conditions table as DataFrame.""" + """Condition table as DataFrame.""" # TODO: return empty df? - return self.conditions_table.to_df() if self.conditions_table else None + return self.condition_table.to_df() if self.condition_table else None @condition_df.setter def condition_df(self, value: pd.DataFrame): - self.conditions_table = core.ConditionsTable.from_df(value) + self.condition_table = core.ConditionTable.from_df(value) @property def experiment_df(self) -> pd.DataFrame | None: - """Experiments table as DataFrame.""" - return ( - self.experiments_table.to_df() if self.experiments_table else None - ) + """Experiment table as DataFrame.""" + return self.experiment_table.to_df() if self.experiment_table else None @experiment_df.setter def experiment_df(self, value: pd.DataFrame): - self.experiments_table = core.ExperimentsTable.from_df(value) + self.experiment_table = core.ExperimentTable.from_df(value) @property def measurement_df(self) -> pd.DataFrame | None: - """Measurements table as DataFrame.""" + """Measurement table as DataFrame.""" return ( self.measurement_table.to_df() if self.measurement_table else None ) @@ -445,14 +443,12 @@ def parameter_df(self, value: pd.DataFrame): @property def observable_df(self) -> pd.DataFrame | None: - """Observables table as DataFrame.""" - return ( - self.observables_table.to_df() if self.observables_table else None - ) + """Observable table as DataFrame.""" + return self.observable_table.to_df() if self.observable_table else None @observable_df.setter def observable_df(self, value: pd.DataFrame): - self.observables_table = core.ObservablesTable.from_df(value) + self.observable_table = core.ObservableTable.from_df(value) @property def mapping_df(self) -> pd.DataFrame | None: @@ -465,14 +461,14 @@ def mapping_df(self, value: pd.DataFrame): def get_optimization_parameters(self) -> list[str]: """ - Get list of optimization parameter IDs from parameter table. + Get the list of optimization parameter IDs from parameter table. Arguments: parameter_df: PEtab parameter DataFrame Returns: - List of IDs of parameters selected for optimization - (i.e. those with estimate = True). + A list of IDs of parameters selected for optimization + (i.e., those with estimate = True). """ return [p.id for p in self.parameter_table.parameters if p.estimate] @@ -489,7 +485,7 @@ def get_observable_ids(self) -> list[str]: """ Returns dictionary of observable ids. """ - return [o.id for o in self.observables_table.observables] + return [o.id for o in self.observable_table.observables] def _apply_mask(self, v: list, free: bool = True, fixed: bool = True): """Apply mask of only free or only fixed values. @@ -499,9 +495,9 @@ def _apply_mask(self, v: list, free: bool = True, fixed: bool = True): v: The full vector the mask is to be applied to. free: - Whether to return free parameters, i.e. parameters to estimate. + Whether to return free parameters, i.e., parameters to estimate. fixed: - Whether to return fixed parameters, i.e. parameters not to + Whether to return fixed parameters, i.e., parameters not to estimate. Returns @@ -891,7 +887,7 @@ def add_condition( core.Change(target_id=target_id, target_value=target_value) for target_id, target_value in kwargs.items() ] - self.conditions_table.conditions.append( + self.condition_table.conditions.append( core.Condition(id=id_, changes=changes) ) if name is not None: @@ -938,7 +934,7 @@ def add_observable( record[OBSERVABLE_TRANSFORMATION] = transform record.update(kwargs) - self.observables_table += core.Observable(**record) + self.observable_table += core.Observable(**record) def add_parameter( self, @@ -1070,7 +1066,7 @@ def add_experiment(self, id_: str, *args): for i in range(0, len(args), 2) ] - self.experiments_table.experiments.append( + self.experiment_table.experiments.append( core.Experiment(id=id_, periods=periods) ) @@ -1085,15 +1081,15 @@ def __iadd__(self, other): ) if isinstance(other, Observable): - self.observables_table += other + self.observable_table += other elif isinstance(other, Parameter): self.parameter_table += other elif isinstance(other, Measurement): self.measurement_table += other elif isinstance(other, Condition): - self.conditions_table += other + self.condition_table += other elif isinstance(other, Experiment): - self.experiments_table += other + self.experiment_table += other else: raise ValueError( f"Cannot add object of type {type(other)} to Problem." diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 57fec8a4..181f5523 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -12,25 +12,25 @@ example_dir_fujita = Path(__file__).parents[2] / "doc/example/example_Fujita" -def test_observables_table_round_trip(): +def test_observable_table_round_trip(): file = example_dir_fujita / "Fujita_observables.tsv" - observables = ObservablesTable.from_tsv(file) + observables = ObservableTable.from_tsv(file) with tempfile.TemporaryDirectory() as tmp_dir: tmp_file = Path(tmp_dir) / "observables.tsv" observables.to_tsv(tmp_file) - observables2 = ObservablesTable.from_tsv(tmp_file) + observables2 = ObservableTable.from_tsv(tmp_file) assert observables == observables2 -def test_conditions_table_round_trip(): +def test_condition_table_round_trip(): with tempfile.TemporaryDirectory() as tmp_dir: petab1to2(example_dir_fujita / "Fujita.yaml", tmp_dir) file = Path(tmp_dir, "Fujita_experimentalCondition.tsv") - conditions = ConditionsTable.from_tsv(file) + conditions = ConditionTable.from_tsv(file) tmp_file = Path(tmp_dir) / "conditions.tsv" conditions.to_tsv(tmp_file) - conditions2 = ConditionsTable.from_tsv(tmp_file) + conditions2 = ConditionTable.from_tsv(tmp_file) assert conditions == conditions2 @@ -52,9 +52,9 @@ def test_experiment_add_periods(): assert exp.periods == [p1, p2] -def test_conditions_table_add_changes(): - conditions_table = ConditionsTable() - assert conditions_table.conditions == [] +def test_condition_table_add_changes(): + condition_table = ConditionTable() + assert condition_table.conditions == [] c1 = Condition( id="condition1", @@ -65,10 +65,10 @@ def test_conditions_table_add_changes(): changes=[Change(target_id="k2", target_value=sp.sympify("2 * x"))], ) - conditions_table += c1 - conditions_table += c2 + condition_table += c1 + condition_table += c2 - assert conditions_table.conditions == [c1, c2] + assert condition_table.conditions == [c1, c2] def test_measurments(): @@ -253,11 +253,11 @@ def test_experiment(): Experiment(id="experiment 1") -def test_conditions_table(): - assert ConditionsTable().free_symbols == set() +def test_condition_table(): + assert ConditionTable().free_symbols == set() assert ( - ConditionsTable( + ConditionTable( conditions=[ Condition( id="condition1", @@ -268,7 +268,7 @@ def test_conditions_table(): == set() ) - assert ConditionsTable( + assert ConditionTable( conditions=[ Condition( id="condition1", From 055dc2cee2b3013638ed6dea137b9a6b824ddb5f Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 27 Mar 2025 13:44:55 +0100 Subject: [PATCH 19/19] estimate -> bool --- petab/v2/core.py | 20 +++++++++++--------- petab/v2/problem.py | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 7f2c6c9f..10088b62 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -890,19 +890,21 @@ def _validate_estimate_before(cls, v): if isinstance(v, bool): return v - if isinstance(v, int) or isinstance(v, float) and v.is_integer(): - if v == 0: - return False - if v == 1: - return True + # FIXME: grace period for 0/1 values until the test suite was updated + if v in [0, 1, "0", "1"]: + return bool(int(v)) + # TODO: clarify whether extra whitespace is allowed if isinstance(v, str): - if v == "0": - return False - if v == "1": + v = v.strip().lower() + if v == "true": return True + if v == "false": + return False - raise ValueError(f"Invalid value for estimate: {v}. Must be 0 or 1.") + raise ValueError( + f"Invalid value for estimate: {v}. Must be `true` or `false`." + ) @field_validator("lb", "ub", "nominal_value") @classmethod diff --git a/petab/v2/problem.py b/petab/v2/problem.py index fede0762..2b564e0c 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -970,7 +970,7 @@ def add_parameter( PARAMETER_ID: id_, } if estimate is not None: - record[ESTIMATE] = int(estimate) + record[ESTIMATE] = estimate if nominal_value is not None: record[NOMINAL_VALUE] = nominal_value if scale is not None: