diff --git a/.github/workflows/test-package.yml b/.github/workflows/test-package.yml index f1a750a..695b011 100644 --- a/.github/workflows/test-package.yml +++ b/.github/workflows/test-package.yml @@ -12,21 +12,24 @@ jobs: max-parallel: 5 steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.12 - uses: actions/setup-python@v3 - with: - python-version: 3.12 - - name: Install dependencies - run: | - pip install poetry - poetry install --with=dev - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - poetry run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - poetry run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - run: | - poetry run pytest --cov --cov-report term --cov-report xml --junitxml=xunit-result.xml + - uses: actions/checkout@v3 + - name: Set up Python 3.12 + uses: actions/setup-python@v3 + with: + python-version: 3.12 + - name: Install dependencies + run: | + pip install poetry + poetry install --with=dev + - name: Lint with ruff + run: | + poetry run ruff check . --fix --exit-non-zero-on-fix + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + poetry run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + poetry run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + poetry run pytest --cov --cov-report term --cov-report xml --junitxml=xunit-result.xml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..7b7b66d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,51 @@ +exclude: '.*\.(csv|msp)$' +default_stages: [pre-commit] + +default_language_version: + python: python3.12 + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-json + - id: check-toml + - id: check-xml + - id: check-yaml + - id: debug-statements + - id: check-builtin-literals + - id: check-case-conflict + - id: check-docstring-first + - id: detect-private-key + + # Ruff linter (primary linter) + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.4 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix, .] + + # Flake8 (optional, for CI or legacy) + - repo: https://github.com/pycqa/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + additional_dependencies: [] + args: [ + "--count", + "--select=E9,F63,F7,F82", + "--show-source", + "--statistics" + ] + - id: flake8 + name: flake8 (warnings) + additional_dependencies: [] + args: [ + "--count", + "--exit-zero", + "--max-complexity=10", + "--max-line-length=127", + "--statistics" + ] diff --git a/CHANGELOG.md b/CHANGELOG.md index 541a6de..de9d245 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,106 +1,175 @@ # Changelog + All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.5.0] - 2025-06-09 + +### Added + +- Added `smiles_to_formula` and `inchi_to_formula` conversions to the RDKit converter. +- Added unit tests for `smiles_to_formula`. +- Added support for tabular files, which are treated like tsv files. + +### Changed + +- Updated `matchms` dependency to `^0.30.0` in `pyproject.toml`. + +### Fixed + +- Minor documentation and test improvements. ## [0.4.1] - 2025-05-19 + ### Changed -* updated dependencies [#157](https://github.com/RECETOX/MSMetaEnhancer/pull/157) + +- updated dependencies [#157](https://github.com/RECETOX/MSMetaEnhancer/pull/157) ## [0.4.0] - 2024-03-13 + ### Changed -* Update IDSM SPARQL queries to achieve better performance by @galgonek in [#152](https://github.com/RECETOX/MSMetaEnhancer/pull/152) -* Switched to poetry and added additional converters by @hechth in [#155](https://github.com/RECETOX/MSMetaEnhancer/pull/155) +- Update IDSM SPARQL queries to achieve better performance by @galgonek in [#152](https://github.com/RECETOX/MSMetaEnhancer/pull/152) +- Switched to poetry and added additional converters by @hechth in [#155](https://github.com/RECETOX/MSMetaEnhancer/pull/155) ## [0.3.0] - 2023-05-12 + ### Added -* general class Data for input handling [#141](https://github.com/RECETOX/MSMetaEnhancer/pull/141) -* DataFrame class to read and handle tabular metadata input [#141](https://github.com/RECETOX/MSMetaEnhancer/pull/141) -* implementation of blocking time in PubChem [#145](https://github.com/RECETOX/MSMetaEnhancer/pull/145) + +- general class Data for input handling [#141](https://github.com/RECETOX/MSMetaEnhancer/pull/141) +- DataFrame class to read and handle tabular metadata input [#141](https://github.com/RECETOX/MSMetaEnhancer/pull/141) +- implementation of blocking time in PubChem [#145](https://github.com/RECETOX/MSMetaEnhancer/pull/145) + ### Changed -* Spectra class is an instantiation of Data class [#141](https://github.com/RECETOX/MSMetaEnhancer/pull/141) -* fix throttling freezing the app [#144](https://github.com/RECETOX/MSMetaEnhancer/pull/144) + +- Spectra class is an instantiation of Data class [#141](https://github.com/RECETOX/MSMetaEnhancer/pull/141) +- fix throttling freezing the app [#144](https://github.com/RECETOX/MSMetaEnhancer/pull/144) + ### Removed -* retired NLM (ChemIDplus) service [#140](https://github.com/RECETOX/MSMetaEnhancer/pull/140) + +- retired NLM (ChemIDplus) service [#140](https://github.com/RECETOX/MSMetaEnhancer/pull/140) ## [0.2.5] - 2022-10-15 + ### Added -* added Pytest config file `pytest.ini` and set it to automatically detect asynchronous tests [#124](https://github.com/RECETOX/MSMetaEnhancer/pull/124) + +- added Pytest config file `pytest.ini` and set it to automatically detect asynchronous tests [#124](https://github.com/RECETOX/MSMetaEnhancer/pull/124) + ### Changed -* fixed Circuit Breaker implementation to be compatible with Python 3.9 [#124](https://github.com/RECETOX/MSMetaEnhancer/pull/124) + +- fixed Circuit Breaker implementation to be compatible with Python 3.9 [#124](https://github.com/RECETOX/MSMetaEnhancer/pull/124) + ### Removed ## [0.2.4] - 2022-08-30 + ### Changed -* escaping of single quotes in IDSM arguments [#102](https://github.com/RECETOX/MSMetaEnhancer/issues/102) -* unified environment and packaging management [#115](https://github.com/RECETOX/MSMetaEnhancer/issues/115) -* apply circuit breaker pattern in WebConverter [#113](https://github.com/RECETOX/MSMetaEnhancer/issues/113) + +- escaping of single quotes in IDSM arguments [#102](https://github.com/RECETOX/MSMetaEnhancer/issues/102) +- unified environment and packaging management [#115](https://github.com/RECETOX/MSMetaEnhancer/issues/115) +- apply circuit breaker pattern in WebConverter [#113](https://github.com/RECETOX/MSMetaEnhancer/issues/113) + ### Removed -* removed test case from curator which fails in matchms > 0.14 [#112](https://github.com/RECETOX/MSMetaEnhancer/issues/112) + +- removed test case from curator which fails in matchms > 0.14 [#112](https://github.com/RECETOX/MSMetaEnhancer/issues/112) ## [0.2.3] - 2022-05-12 + ### Added -* KEGG ID conversions support to BridgeDb service [#101](https://github.com/RECETOX/MSMetaEnhancer/issues/101) + +- KEGG ID conversions support to BridgeDb service [#101](https://github.com/RECETOX/MSMetaEnhancer/issues/101) + ### Changed -* double quotes to single quotes in IDSM [#102](https://github.com/RECETOX/MSMetaEnhancer/issues/102) + +- double quotes to single quotes in IDSM [#102](https://github.com/RECETOX/MSMetaEnhancer/issues/102) ## [0.2.2] - 2022-04-27 + ### Added -* introduced `error` level into logging [#95](https://github.com/RECETOX/MSMetaEnhancer/issues/95) -* logging of unknown errors in Annotator [#90](https://github.com/RECETOX/MSMetaEnhancer/issues/90) + +- introduced `error` level into logging [#95](https://github.com/RECETOX/MSMetaEnhancer/issues/95) +- logging of unknown errors in Annotator [#90](https://github.com/RECETOX/MSMetaEnhancer/issues/90) + ### Changed -* the log file is now written continuously during annotation and the metrics added at the end of the file [#92](https://github.com/RECETOX/MSMetaEnhancer/issues/92) + +- the log file is now written continuously during annotation and the metrics added at the end of the file [#92](https://github.com/RECETOX/MSMetaEnhancer/issues/92) + ### Removed ## [0.2.1] - 2022-04-05 + ### Added -* try-finally block to ensure the Monitor thread is always terminated [#86](https://github.com/RECETOX/MSMetaEnhancer/issues/86) + +- try-finally block to ensure the Monitor thread is always terminated [#86](https://github.com/RECETOX/MSMetaEnhancer/issues/86) + ### Changed -* improved parsing of PubChem responses [#84](https://github.com/RECETOX/MSMetaEnhancer/issues/84) + +- improved parsing of PubChem responses [#84](https://github.com/RECETOX/MSMetaEnhancer/issues/84) ## [0.2.0] - 2022-03-19 + ### Added -* BridgeDb supporting conversion of several database IDs [#76](https://github.com/RECETOX/MSMetaEnhancer/issues/76) -* ComputeConverter class for conversions based on computation instead of querying [#75](https://github.com/RECETOX/MSMetaEnhancer/issues/75) -* ConverterBuilder which validates and initialises converters [#75](https://github.com/RECETOX/MSMetaEnhancer/issues/75) -* reintroduced PubChem service using direct REST web interface [#76](https://github.com/RECETOX/MSMetaEnhancer/issues/76) + +- BridgeDb supporting conversion of several database IDs [#76](https://github.com/RECETOX/MSMetaEnhancer/issues/76) +- ComputeConverter class for conversions based on computation instead of querying [#75](https://github.com/RECETOX/MSMetaEnhancer/issues/75) +- ConverterBuilder which validates and initialises converters [#75](https://github.com/RECETOX/MSMetaEnhancer/issues/75) +- reintroduced PubChem service using direct REST web interface [#76](https://github.com/RECETOX/MSMetaEnhancer/issues/76) + ### Changed -* reorganised Converter class to support computation approach [#75](https://github.com/RECETOX/MSMetaEnhancer/issues/75) -* renamed PubChem service to IDSM to avoid confusion [#73](https://github.com/RECETOX/MSMetaEnhancer/issues/73) + +- reorganised Converter class to support computation approach [#75](https://github.com/RECETOX/MSMetaEnhancer/issues/75) +- renamed PubChem service to IDSM to avoid confusion [#73](https://github.com/RECETOX/MSMetaEnhancer/issues/73) ## [0.1.3] - 2022-02-15 + ### Added -* multidict package requirement -* tracking of attributes validation in log [#68](https://github.com/RECETOX/MSMetaEnhancer/issues/68) -* CIR: Inchi -> SMILES conversion [#66](https://github.com/RECETOX/MSMetaEnhancer/issues/66) + +- multidict package requirement +- tracking of attributes validation in log [#68](https://github.com/RECETOX/MSMetaEnhancer/issues/68) +- CIR: Inchi -> SMILES conversion [#66](https://github.com/RECETOX/MSMetaEnhancer/issues/66) + ### Changed -* passed `multidict` instead of `frozendict` to `aiohttp.ClientSession.post` (required by package) -* take only first result when there are multiple hits in CIR conversions [#69](https://github.com/RECETOX/MSMetaEnhancer/issues/69) -* support `ISOMERIC_SMILES` and `CANONICAL_SMILES` in PubChem instead of generic `SMILES` [#67](https://github.com/RECETOX/MSMetaEnhancer/issues/67) + +- passed `multidict` instead of `frozendict` to `aiohttp.ClientSession.post` (required by package) +- take only first result when there are multiple hits in CIR conversions [#69](https://github.com/RECETOX/MSMetaEnhancer/issues/69) +- support `ISOMERIC_SMILES` and `CANONICAL_SMILES` in PubChem instead of generic `SMILES` [#67](https://github.com/RECETOX/MSMetaEnhancer/issues/67) ## [0.1.2] - 2022-01-06 + ### Added + - `generate_options()` function in `Galaxy` submodule to create all possible conversions supported by the tool in a format suitable for the galaxy tool form [#58](https://github.com/RECETOX/MSMetaEnhancer/pull/58) - monitoring of services status during annotation process [#56](https://github.com/RECETOX/MSMetaEnhancer/issues/56) - validation of obtained metadata [#59](https://github.com/RECETOX/MSMetaEnhancer/issues/59) + ### Changed + - structure and contents of documentation [#51](https://github.com/RECETOX/MSMetaEnhancer/pull/51) + ### Removed + - tests checking contents and consistency of individual services [#54](https://github.com/RECETOX/MSMetaEnhancer/pull/61) ## [0.1.1] - 2021-12-07 + ### Added + - `get_conversion_functions` on the level of `Converter` + ### Changed + - computation of all available jobs in `Application` + ### Removed + - `get_all_conversions` on the level of `Annotator` ## [0.1.0] - 2021-11-16 + ### Added + - Added conda environment files [#35](https://github.com/RECETOX/MSMetaEnhancer/pull/35) - Usage of IDSM SPARQL for PubChem service [#25](https://github.com/RECETOX/MSMetaEnhancer/pull/25) - Added logging and quantitative progress of annotation process [#22](https://github.com/RECETOX/MSMetaEnhancer/pull/22) diff --git a/MSMetaEnhancer/app.py b/MSMetaEnhancer/app.py index c3a6a25..d8e0a47 100644 --- a/MSMetaEnhancer/app.py +++ b/MSMetaEnhancer/app.py @@ -8,7 +8,7 @@ from MSMetaEnhancer.libs.data import Spectra, DataFrame from MSMetaEnhancer.libs.utils import logger from MSMetaEnhancer.libs.utils.ConverterBuilder import ConverterBuilder -from MSMetaEnhancer.libs.utils.Errors import UnknownSpectraFormat +from MSMetaEnhancer.libs.utils.Errors import UnknownFileFormat from MSMetaEnhancer.libs.utils.Job import convert_to_jobs from MSMetaEnhancer.libs.utils.Monitor import Monitor @@ -27,10 +27,10 @@ def load_data(self, filename, file_format): """ if file_format in ['msp', 'mgf', 'json']: self.data = Spectra() - elif file_format in ['csv', 'tsv', 'xlsx']: + elif file_format in ['csv', 'tsv', 'tabular', 'xlsx']: self.data = DataFrame() else: - raise UnknownSpectraFormat(f'Format {file_format} not supported.') + raise UnknownFileFormat(f'Format {file_format} not supported.') self.data.load_data(filename, file_format) def save_data(self, filename, file_format): diff --git a/MSMetaEnhancer/libs/Curator.py b/MSMetaEnhancer/libs/Curator.py index 0e5e33c..7c833f8 100644 --- a/MSMetaEnhancer/libs/Curator.py +++ b/MSMetaEnhancer/libs/Curator.py @@ -1,6 +1,8 @@ from matchms.filtering.filter_utils.smile_inchi_inchikey_conversions import ( is_valid_smiles, is_valid_inchi, is_valid_inchikey ) +from MSMetaEnhancer.libs.utils.Errors import InvalidAttributeFormat + # Example usage smiles = "C1=CC=CC=C1" @@ -10,7 +12,6 @@ print(is_valid_smiles(smiles)) # True if valid SMILES print(is_valid_inchi(inchi)) # True if valid InChI print(is_valid_inchikey(inchikey)) # True if valid InChIKey -from MSMetaEnhancer.libs.utils.Errors import InvalidAttributeFormat class Curator: diff --git a/MSMetaEnhancer/libs/converters/compute/RDKit.py b/MSMetaEnhancer/libs/converters/compute/RDKit.py index 2f4f2df..1762a02 100644 --- a/MSMetaEnhancer/libs/converters/compute/RDKit.py +++ b/MSMetaEnhancer/libs/converters/compute/RDKit.py @@ -2,6 +2,7 @@ from rdkit.Chem.Descriptors import ExactMolWt from rdkit.Chem import MolFromSmiles, MolToSmiles from rdkit.Chem.inchi import MolFromInchi +from rdkit.Chem.rdMolDescriptors import CalcMolFormula from rdkit.Chem import Atom @@ -68,3 +69,31 @@ def formula_to_mw(self, formula): multiplier = int(parts[index + 1]) if len(parts) > index + 1 and parts[index + 1].isnumeric() else 1 mass += atom.GetMass() * multiplier return {'mw': mass} + + def smiles_to_formula(self, smiles: str) -> dict: + """ + Compute molecular formula from SMILES. + + :param smiles: given SMILES + :return: computed molecular formula + """ + mol = MolFromSmiles(smiles) + if mol is None: + return {'formula': ''} + + formula = CalcMolFormula(mol) + + return {'formula': formula} + + def inchi_to_formula(self, inchi: str) -> dict: + """ + Compute molecular formula from InChI. + + :param inchi: given InChI + :return: computed molecular formula + """ + mol = MolFromInchi(inchi) + if mol is None: + return {'formula': ''} + formula = CalcMolFormula(mol) + return {'formula': formula} diff --git a/MSMetaEnhancer/libs/converters/web/WebConverter.py b/MSMetaEnhancer/libs/converters/web/WebConverter.py index b4fbd90..d5736e2 100644 --- a/MSMetaEnhancer/libs/converters/web/WebConverter.py +++ b/MSMetaEnhancer/libs/converters/web/WebConverter.py @@ -26,7 +26,7 @@ def __init__(self, session: aiohttp.ClientSession): """ super().__init__() self.session: aiohttp.ClientSession = session - self.endpoints = dict() + self.endpoints = {} async def convert(self, source: str, target: str, data: Union[str, int, float]): """Convert data from source attribute to target attribute. @@ -46,7 +46,7 @@ async def convert(self, source: str, target: str, data: Union[str, int, float]): if result: return result else: - raise TargetAttributeNotRetrieved(f'No data retrieved.') + raise TargetAttributeNotRetrieved('No data retrieved.') @lru_cache async def query_the_service(self, service: str, args: str, method: str = 'GET', data=None, headers=None) -> str: @@ -81,7 +81,7 @@ async def make_request(self, url, method, data, headers): :return: obtained response """ if headers is None: - headers = dict() + headers = {} if method == 'GET': async with self.session.get(url, headers=headers) as response: return await self.process_request(response, url, method) diff --git a/MSMetaEnhancer/libs/data/DataFrame.py b/MSMetaEnhancer/libs/data/DataFrame.py index 8ad4f3d..a6c06aa 100644 --- a/MSMetaEnhancer/libs/data/DataFrame.py +++ b/MSMetaEnhancer/libs/data/DataFrame.py @@ -1,7 +1,7 @@ import pandas from MSMetaEnhancer.libs.data.Data import Data -from MSMetaEnhancer.libs.utils.Errors import UnknownSpectraFormat +from MSMetaEnhancer.libs.utils.Errors import UnknownFileFormat class DataFrame(Data): @@ -12,17 +12,19 @@ def load_data(self, filename: str, file_format: str): """ Loads given file as a list of pandas DataFrame. - Supported formats: csv, tsv, xlsx + Supported formats: csv, tsv/tabular, xlsx :param filename: given file :param file_format: format of the input file """ if file_format == 'csv': self.df = pandas.read_csv(filename, dtype=str) - elif file_format == 'tsv': + elif file_format in ['tsv', 'tabular']: self.df = pandas.read_csv(filename, dtype=str, sep='\t') - else: + elif file_format == 'xlsx': self.df = pandas.read_excel(filename, dtype=str) + else: + raise UnknownFileFormat(f'Format {file_format} not supported.') def save_data(self, filename: str, file_format: str): """ @@ -35,12 +37,12 @@ def save_data(self, filename: str, file_format: str): """ if file_format == 'csv': self.df.to_csv(filename, index=False) - elif file_format == 'tsv': + elif file_format in ['tsv', 'tabular']: self.df.to_csv(filename, index=False, sep='\t') elif file_format == 'xlsx': self.df.to_excel(filename) else: - raise UnknownSpectraFormat(f'Format {file_format} not supported.') + raise UnknownFileFormat(f'Format {file_format} not supported.') def get_metadata(self): return self.df.to_dict('records') diff --git a/MSMetaEnhancer/libs/data/Spectra.py b/MSMetaEnhancer/libs/data/Spectra.py index 9a2638e..94bf1e4 100644 --- a/MSMetaEnhancer/libs/data/Spectra.py +++ b/MSMetaEnhancer/libs/data/Spectra.py @@ -4,7 +4,7 @@ import matchms.importing from MSMetaEnhancer.libs.data.Data import Data -from MSMetaEnhancer.libs.utils.Errors import UnknownSpectraFormat +from MSMetaEnhancer.libs.utils.Errors import UnknownFileFormat class Spectra(Data): @@ -45,7 +45,7 @@ def save_data(self, filename: str, file_format: str): try: getattr(matchms.exporting, f'save_as_{file_format}')(self.spectrums, filename) except Exception: - raise UnknownSpectraFormat(f'Format {file_format} not supported.') + raise UnknownFileFormat(f'Format {file_format} not supported.') def get_metadata(self): return [spectra.metadata for spectra in self.spectrums] diff --git a/MSMetaEnhancer/libs/utils/ConverterBuilder.py b/MSMetaEnhancer/libs/utils/ConverterBuilder.py index 15960cb..072eb1c 100644 --- a/MSMetaEnhancer/libs/utils/ConverterBuilder.py +++ b/MSMetaEnhancer/libs/utils/ConverterBuilder.py @@ -1,6 +1,4 @@ -from MSMetaEnhancer.libs.converters.web import * from MSMetaEnhancer.libs.converters.web import __all__ as web_converters -from MSMetaEnhancer.libs.converters.compute import * from MSMetaEnhancer.libs.converters.compute import __all__ as compute_converters from MSMetaEnhancer.libs.utils.Errors import UnknownConverter @@ -29,7 +27,7 @@ def build_converters(session, converters: list): :param converters: list of converters to be built :return: built converters """ - built_web_converters, built_converters = dict(), dict() + built_web_converters, built_converters = {}, {} for converter in converters: if converter in web_converters: built_web_converters[converter] = eval(converter)(session) diff --git a/MSMetaEnhancer/libs/utils/Errors.py b/MSMetaEnhancer/libs/utils/Errors.py index 194b5a3..ba18d79 100644 --- a/MSMetaEnhancer/libs/utils/Errors.py +++ b/MSMetaEnhancer/libs/utils/Errors.py @@ -10,7 +10,8 @@ class UnknownConverter(Exception): pass -class UnknownSpectraFormat(Exception): +class UnknownFileFormat(Exception): + """Format not supported.""" pass diff --git a/docs/source/conf.py b/docs/source/conf.py index a33009e..8014eb3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -15,7 +15,6 @@ sys.path.insert(0, os.path.abspath('../../')) -import MSMetaEnhancer from shutil import copyfile copyfile('../../README.md', 'readme.md') copyfile('../../CHANGELOG.md', 'CHANGELOG.md') diff --git a/pyproject.toml b/pyproject.toml index a371707..12b00bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "msmetaenhancer" -version = "0.4.1" +version = "0.5.0" description = "Repository for tool that adds more annotations (e.g. SMILES, InChI, CAS number) to MSP files (Python version)." authors = [ "Helge Hecht ", @@ -14,7 +14,7 @@ packages = [ [tool.poetry.dependencies] python = ">=3.10,<3.13" -matchms = ">=0.28.2" +matchms = ">=0.30.0" pandas = "^2.2.1" scipy = "^1.12.0" requests = "^2.31.0" @@ -35,7 +35,9 @@ pytest-cov = "^4.1.0" pytest-aiohttp = "^1.0.5" pytest-dependency = "^0.6.0" myst-parser = "^2.0.0" -flake8 = "^7.0.0" +ruff = "^0.11.13" +pre-commit = "^4.2.0" +flake8 = "^7.2.0" [tool.poetry.group.docs.dependencies] diff --git a/tests/test_IDSM.py b/tests/test_IDSM.py index 7b19748..25c6d95 100644 --- a/tests/test_IDSM.py +++ b/tests/test_IDSM.py @@ -5,7 +5,6 @@ from MSMetaEnhancer.libs.converters.web import IDSM from frozendict import frozendict -from MSMetaEnhancer.libs.utils.Errors import UnknownResponse from tests.utils import wrap_with_session diff --git a/tests/test_io.py b/tests/test_io.py index 535864e..a2a1063 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,5 +1,4 @@ import pytest -import pandas import mock from MSMetaEnhancer.libs.data import Spectra, DataFrame @@ -33,12 +32,7 @@ def test_get_metadata(backend, file_type, filename): f"Value mismatch for key '{key}' at index {i}: {meta_item[key]} != {data_item[key]}" ) -@pytest.mark.parametrize('filename, sep', [ - ['tests/test_data/sample_metadata.csv', ','], - ['tests/test_data/sample_metadata.tsv', '\t'], -]) -def test_fuse_metadata_dataframe(filename, sep): - pandas_df = pandas.read_csv(filename, dtype=str, sep=sep) +def test_fuse_metadata_dataframe(): df = DataFrame() df.fuse_metadata(DATA) # Compare row by row, ignoring mismatched keys @@ -68,3 +62,24 @@ def test_fuse_metadata_spectra(): assert fused_item[key] == loaded_item[key], ( f"Value mismatch for key '{key}' at index {i}: {fused_item[key]} != {loaded_item[key]}" ) + + +def test_tabular_data(): + """ + Test loading and comparing tabular (TSV) data using the DataFrame backend. + """ + df = DataFrame() + filename = 'tests/test_data/sample_metadata.tsv' + file_type = 'tabular' + df.load_data(filename, file_type) + metadata = df.get_metadata() + + # Compare lengths + assert len(metadata) == len(DATA), f"Metadata length mismatch: {len(metadata)} != {len(DATA)}" + # Compare values of matching keys + for i, (meta_item, data_item) in enumerate(zip(metadata, DATA)): + for key in meta_item.keys(): + if key in data_item: + assert meta_item[key] == data_item[key], ( + f"Value mismatch for key '{key}' at index {i}: {meta_item[key]} != {data_item[key]}" + ) diff --git a/tests/test_rdkit.py b/tests/test_rdkit.py index 4873a79..fdc2ffb 100644 --- a/tests/test_rdkit.py +++ b/tests/test_rdkit.py @@ -13,7 +13,9 @@ 'isomeric_smiles': 'C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@@]43C)[C@@H]1CC[C@@H]2O' }], ['from_smiles', CANONICAL_SMILES, {'mw': 288.208930136}], - ["formula_to_mw", "C9H15N4O8P", {'mw': 338.21299999999997}] + ["formula_to_mw", "C9H15N4O8P", {'mw': 338.21299999999997}], + ['smiles_to_formula', CANONICAL_SMILES, {'formula': 'C19H28O2'}], + ['inchi_to_formula', INCHI, {'formula': 'C19H28O2'}], ]) def test_convert_methods(method, input, expected): func = getattr(RDKit(), method)