From 712e41fa0a99f038b76990a87f34d81eca00a7b3 Mon Sep 17 00:00:00 2001 From: Tom Searle Date: Tue, 22 Jul 2025 00:19:38 +0100 Subject: [PATCH 1/4] CU-8699wjhfu: make client available and publishable --- .github/workflows/ci.yml | 36 ++- .github/workflows/qa.yml | 42 ++- .github/workflows/release.yml | 45 ++- client/README.md | 88 ++++++ client/__init__.py | 0 client/mctclient.py | 547 +++++++++++++++++++++++++++++++++ client/pyproject.toml | 18 ++ client/tests/test_mctclient.py | 119 +++++++ 8 files changed, 892 insertions(+), 3 deletions(-) create mode 100644 client/README.md create mode 100644 client/__init__.py create mode 100644 client/mctclient.py create mode 100644 client/pyproject.toml create mode 100644 client/tests/test_mctclient.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ad8070fa..1c737e66 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,10 +3,44 @@ name: ci-build on: [push] jobs: - # run tests / lint / etc. before building container image? + # Test and build client library + test-client: + runs-on: ubuntu-latest + steps: + - name: Checkout main + uses: actions/checkout@v4 + with: + ref: ${{ github.ref }} + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install requests pytest + + - name: Install client package in development mode + run: | + cd client + pip install -e . + + - name: Run client tests + run: | + cd client + python -m pytest tests/ -v + + - name: Build client package + run: | + cd client + python -m build + # Build and test webapp container build-and-push: runs-on: ubuntu-latest + needs: test-client steps: - name: Checkout main uses: actions/checkout@v4 diff --git a/.github/workflows/qa.yml b/.github/workflows/qa.yml index f7df2837..9e71a032 100644 --- a/.github/workflows/qa.yml +++ b/.github/workflows/qa.yml @@ -5,10 +5,50 @@ on: branches: [ main ] jobs: - # run tests / lint / etc. before building container image? + # Test and build client library + test-client: + runs-on: ubuntu-latest + steps: + - name: Checkout main + uses: actions/checkout@v4 + with: + ref: 'main' + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install requests pytest build + + - name: Install client package in development mode + run: | + cd client + pip install -e . + + - name: Run client tests + run: | + cd client + python -m pytest tests/ -v + + - name: Build client package + run: | + cd client + python -m build + + - name: Publish dev distribution to Test PyPI + uses: pypa/gh-action-pypi-publish@v1.4.2 + with: + password: ${{ secrets.TEST_PYPI_API_TOKEN }} + repository_url: https://test.pypi.org/legacy/ + # Build and test webapp container build-and-push: runs-on: ubuntu-latest + needs: test-client steps: - name: Checkout main uses: actions/checkout@v4 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 56f94e42..8d7b79d7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -5,10 +5,53 @@ on: tags: ["v*.*.*"] jobs: - # run tests / lint / etc. before building container image? + # Test, build and publish client library + test-and-publish-client: + runs-on: ubuntu-latest + steps: + - name: Checkout main + uses: actions/checkout@v4 + with: + ref: "main" + + - name: Release Tag + run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install requests pytest build twine + + - name: Install client package in development mode + run: | + cd client + pip install -e . + + - name: Run client tests + run: | + cd client + python -m pytest tests/ -v + + - name: Build client package + run: | + cd client + python -m build + + - name: Publish production distribution to PyPI + if: startsWith(github.ref, 'refs/tags') && ! github.event.release.prerelease + uses: pypa/gh-action-pypi-publish@v1.4.2 + with: + password: ${{ secrets.PYPI_API_TOKEN }} + # Build and test webapp container build-and-push: runs-on: ubuntu-latest + needs: test-and-publish-client steps: - name: Checkout main uses: actions/checkout@v4 diff --git a/client/README.md b/client/README.md new file mode 100644 index 00000000..d5d13132 --- /dev/null +++ b/client/README.md @@ -0,0 +1,88 @@ + +--- + +# MedCATtrainer Client + +A Python client for interacting with a MedCATTrainer web application instance. This package allows you to manage datasets, concept databases, vocabularies, model packs, users, projects, and more via Python code or the command line. + +## Features + +- Manage datasets, concept databases, vocabularies, and model packs +- Create and manage users and projects +- Retrieve and upload project annotations +- Command-line interface (CLI) for automation + +## Installation + +```sh +pip install mctclient +``` + +Or, if installing from source: + +```sh +cd client +python -m build +pip install dist/*.whl +``` + +## Python Usage + +```sh +export MCTRAINER_USERNAME= +export MCTRAINER_PASSWORD= +``` + +```python +from mctclient import MedCATTrainerSession, MCTDataset, MCTConceptDB, MCTVocab, MCTModelPack, MCTMetaTask, MCTRelTask, MCTUser, MCTProject + +# Connect to your MedCATTrainer instance +session = MedCATTrainerSession(server="http://localhost:8001") + +# List all projects +projects = session.get_projects() +for project in projects: + print(project) + +# Create a new dataset +dataset = session.create_dataset(name="My Dataset", dataset_file="path/to/data.csv") + +# Create a new user +user = session.create_user(username="newuser", password="password123") + +# Create a new project +project = session.create_project( + name="My Project", + description="A new annotation project", + members=[user], + dataset=dataset +) +``` + +### MedCATTrainerSession Methods + +- `create_project(name, description, members, dataset, cuis=[], cuis_file=None, concept_db=None, vocab=None, cdb_search_filter=None, modelpack=None, meta_tasks=[], rel_tasks=[])` +- `create_dataset(name, dataset_file)` +- `create_user(username, password)` +- `create_medcat_model(cdb, vocab)` +- `create_medcat_model_pack(model_pack)` +- `get_users()` +- `get_models()` +- `get_model_packs()` +- `get_meta_tasks()` +- `get_rel_tasks()` +- `get_projects()` +- `get_datasets()` +- `get_project_annos(projects)` + +Each method returns the corresponding object or a list of objects. + +## License + +This project is licensed under the Apache 2.0 License. + +## Contributing + +Pull requests are welcome! For major changes, please open an issue first to discuss what you would like to change. + + diff --git a/client/__init__.py b/client/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/client/mctclient.py b/client/mctclient.py new file mode 100644 index 00000000..2d4b0370 --- /dev/null +++ b/client/mctclient.py @@ -0,0 +1,547 @@ +from dataclasses import dataclass +import json +import os +from abc import ABC +from typing import List, Tuple, Union + +import requests + +import logging + +logger = logging.getLogger(__name__) + + +@dataclass +class MCTObj(ABC): + id: str=None + + def valid(self): + return self.id is not None + + +@dataclass +class MCTDataset(MCTObj): + """A dataset in the MedCATTrainer instance. + + Attributes: + name (str): The name of the dataset. + dataset_file (str): The path to the dataset file, can be a csv, or excel file, with at + least 2 columns: 'name': unique identifier for each text, and 'text': the text to be annotated. + """ + name: str=None + dataset_file: str=None + + def __str__(self): + return f'{self.id} : {self.name} \t {self.dataset_file}' + + +@dataclass +class MCTConceptDB(MCTObj): + """A concept database in the MedCATTrainer instance. + + Attributes: + name (str): The name of the concept database. Name must start with a lowercase letter and contain only alphanumeric characters and underscores. + conceptdb_file (str): The path to the concept database file, should be a .dat file. + use_for_training (bool): Whether to use the concept database for training. Defaults to True as most uploaded CDBs will be used for training, unless they are used for the concept search lookup. + """ + name: str=None + conceptdb_file: str=None + use_for_training: bool=True + + def __post_init__(self): + if self.name is not None: + if not self.name[0].islower(): + raise ValueError("Name must start with a lowercase letter") + if not self.name.replace('_', '').replace('-', '').isalnum(): + raise ValueError("Name must contain only alphanumeric characters and underscores") + + def __str__(self): + return f'{getattr(self, "id", "N/A")} : {self.name} \t {self.conceptdb_file}' + + +@dataclass +class MCTVocab(MCTObj): + """A vocabulary in the MedCATTrainer instance. + + Attributes: + name (str): The name of the vocabulary. + vocab_file (str): The path to the vocabulary file, should be a .dat file. + """ + name: str=None + vocab_file: str=None + + def __str__(self): + return f'{self.id} : {self.vocab_file}' + + +@dataclass +class MCTModelPack(MCTObj): + """A model pack in the MedCATTrainer instance. + + Attributes: + name (str): The name of the model pack. + model_pack_zip (str): The path to the model pack zip file, should be a .zip file. + """ + name: str=None + model_pack_zip: str=None + + def __str__(self): + return f'{self.id} : {self.name} \t {self. model_pack_zip}' + + +@dataclass +class MCTMetaTask(MCTObj): + """A meta task in the MedCATTrainer instance. + + Attributes: + name (str): The name of the meta task. + """ + name: str=None + + def __str__(self): + return f'{self.id} : {self.name}' + + +@dataclass +class MCTRelTask(MCTObj): + """A relation extraction task in the MedCATTrainer instance. + + Attributes: + name (str): The name of the relation extraction task. + """ + name: str=None + + def __str__(self): + return f'{self.id} : {self.name}' + + +@dataclass +class MCTUser(MCTObj): + """A user in the MedCATTrainer instance. + + Attributes: + username (str): The username of the user. + """ + username: str=None + + def __str__(self): + return f'{self.id} : {self.username}' + + +@dataclass +class MCTProject(MCTObj): + """A project in the MedCATTrainer instance. + + Attributes: + name (str): The name of the project. + description (str): The description of the project. + cuis (str): The CUIs to be used in the project filter. + dataset (MCTDataset): The dataset to be used in the project. + concept_db (MCTConceptDB): The concept database to be used in the project. + vocab (MCTVocab): The vocabulary to be used in the project. + members (List[MCTUser]): The annotators for the project. + meta_tasks (List[MCTMetaTask]): The meta tasks for the project. + rel_tasks (List[MCTRelTask]): The relation extraction tasks for the project. + """ + name: str=None + description: str=None + cuis: str=None + dataset: MCTDataset=None + concept_db: MCTConceptDB=None + vocab: MCTVocab=None + members: List[MCTUser]=None + meta_tasks: List[MCTMetaTask]=None + rel_tasks: List[MCTRelTask]=None + + def __str__(self): + return f'{self.id} : {self.name} \t {self.description} \t {self.dataset}' + + + +class MedCATTrainerSession: + """Wrapper for the MedCATTrainer API. + This class provides a wrapper around the MedCATTrainer API, allowing for easy creation of projects, datasets, users, and models. + + Attributes: + server (str): The server to connect to can also be set by an ENVVAR MCTRAINER_SERVER. Defaults to http://localhost:8001. + username (str): The username to connect to can also be set by an ENVVAR MCTRAINER_USERNAME. + password (str): The password to connect to can also be set by an ENVVAR MCTRAINER_PASSWORD. + + Example: + Create a project with a concept database, vocabulary, dataset, and user. + + >>> session = MedCATTrainerSession() + >>> ds = session.create_dataset(name='Test DS', dataset_file='.csv') + >>> cdb_file = '/cdb.dat' + >>> vocab_file = '/vocab.dat' + >>> model_pack_zip = '.zip' + >>> # Create a concept database and vocabulary in the MCTrainer instance. This is the NER+L model only. + >>> cdb, vocab = session.create_medcat_model(MCTConceptDB(name='test_cdb', conceptdb_file=cdb_file), + MCTVocab(name='test_vocab', vocab_file=vocab_file)) + >>> # OR Create a model pack in the MCTrainer instance, NER+L, plus any MetaCAT or RelCAT models packaged together. + >>> session.create_medcat_model_pack(MCTModelPack(name='test_model_pack', model_pack_zip=model_pack_zip)) + >>> session.create_project(name='test-project', description='test-description', members=[MCTUser(username='test-user')], dataset=ds, concept_db=cdb, vocab=vocab) + + A common interaction would be to create a project with a new dataset but existing concept database and vocabulary or Modelpack. + >>> projects = session.get_projects() + >>> ds = session.create_dataset(name='New Test DS', dataset_file='/Users/tom/phd/MedCATtrainer/notebook_docs/example_data/cardio.csv') + >>> # MCTObjects can be referenced by name or by the wrapper object. + >>> session.create_project(name='test-project', description='test-description', members=[MCTUser(username='test-user')], dataset=ds, + concept_db=MCTConceptDB(name='test_cdb'), vocab=MCTVocab(name='test_vocab')) + + To download annotations for a project: + >>> projects = session.get_projects() + >>> annotations = session.get_project_annos(projects[0]) + """ + + def __init__(self, server=None, username=None, password=None): + """Initialize the MedCATTrainerSession. + + Args: + server (_type_, optional): _description_. Defaults to None. + + Raises: + MCTUtilsException: _description_ + """ + self.username = username or os.getenv("MCTRAINER_USERNAME") + self.password = password or os.getenv("MCTRAINER_PASSWORD") + self.server = server or 'http://localhost:8001' + + payload = {"username": self.username, "password": self.password} + resp = requests.post(f"{self.server}/api/api-token-auth/", json=payload) + if 200 <= resp.status_code < 300: + token = json.loads(resp.text)["token"] + self.headers = { + 'Authorization': f'Token {token}', + } + else: + raise MCTUtilsException(f'Failed to login to MedCATtrainer instance running at: {self.server}') + + def create_project(self, name: str, + description: str, + members: Union[List[MCTUser], List[str]], + dataset: Union[MCTDataset, str], + cuis: List[str]=[], + cuis_file: str=None, + concept_db: Union[MCTConceptDB, str]=None, + vocab: Union[MCTVocab, str]=None, + cdb_search_filter: Union[MCTConceptDB, str]=None, + modelpack: Union[MCTModelPack, str]=None, + meta_tasks: Union[List[MCTMetaTask], List[str]]=[], + rel_tasks: Union[List[MCTRelTask], List[str]]=[]): + """Create a new project in the MedCATTrainer session. + Users, models, datasets etc. can be referred to by either their client wrapper object or their name, and the ID will be retrieved + then used to create the project. Most names have a unique constraint on them so for the majority of cases will not results in an error. + + Only a concept_db and vocab pair, or a modelpack needs to be specified. + + Setting a modelpack will also eventually automatically select meta tasks and rel tasks. + + Args: + name (str): The name of the project. + description (str): The description of the project. + members (Union[List[MCTUser], List[str]]): The annotators for the project. + dataset (Union[MCTDataset, str]): The dataset to be used in the project. + cuis (List[str]): The CUIs to be used in the project filter. + cuis_file (str): The file containing the CUIs to be used in the project filter, will be appended to the cuis list. + concept_db (Union[MCTConceptDB, str], optional): The concept database to be used in the project. Defaults to None. + vocab (Union[MCTVocab, str], optional): The vocabulary to be used in the project. Defaults to None. + cdb_search_filter (Union[MCTConceptDB, str], optional): _description_. Defaults to None. + modelpack (Union[MCTModelPack, str], optional): _description_. Defaults to None. + meta_tasks (Union[List[MCTMetaTask], List[str]], optional): _description_. Defaults to None. + rel_tasks (Union[List[MCTRelTask], List[str]], optional): _description_. Defaults to None. + + Raises: + MCTUtilsException: If the project creation fails + + Returns: + MCTProject: The created project + """ + + if all(isinstance(m, str) for m in members): + mct_members = [u for u in self.get_users() if u.username in members] + if len(mct_members) != len(members): + raise MCTUtilsException(f'Not all users found in MedCATTrainer instance: {members} requested, trainer members found: {mct_members}') + else: + members = mct_members + + if isinstance(dataset, str): + try: + dataset = [d for d in self.get_datasets() if d.name == dataset].pop() + except IndexError: + raise MCTUtilsException(f'Dataset not found in MedCATTrainer instance: {dataset}') + + if isinstance(concept_db, str): + try: + concept_db = [c for c in self.get_models()[0] if c.name == concept_db].pop() + except IndexError: + raise MCTUtilsException(f'Concept DB not found in MedCATTrainer instance: {concept_db}') + + if isinstance(vocab, str): + try: + vocab = [v for v in self.get_models()[1] if v.name == vocab].pop() + except IndexError: + raise MCTUtilsException(f'Vocab not found in MedCATTrainer instance: {vocab}') + + if isinstance(cdb_search_filter, str): + try: + cdb_search_filter = [c for c in self.get_concept_dbs() if c.name == cdb_search_filter].pop() + except IndexError: + raise MCTUtilsException(f'Concept DB not found in MedCATTrainer instance: {cdb_search_filter}') + + if isinstance(modelpack, str): + try: + modelpack = [m for m in self.get_model_packs() if m.name == modelpack].pop() + except IndexError: + raise MCTUtilsException(f'Model pack not found in MedCATTrainer instance: {modelpack}') + + if all(isinstance(m, str) for m in meta_tasks): + mct_meta_tasks = [m for m in self.get_meta_tasks() if m.name in meta_tasks] + if len(mct_meta_tasks) != len(meta_tasks): + raise MCTUtilsException(f'Not all meta tasks found in MedCATTrainer instance: {meta_tasks} requested, trainer meta tasks found: {mct_meta_tasks}') + else: + meta_tasks = mct_meta_tasks + + if all(isinstance(r, str) for r in rel_tasks): + mct_rel_tasks = [r for r in self.get_rel_tasks() if r.name in rel_tasks] + if len(mct_rel_tasks) != len(rel_tasks): + raise MCTUtilsException(f'Not all rel tasks found in MedCATTrainer instance: {rel_tasks} requested, trainer rel tasks found: {mct_rel_tasks}') + else: + rel_tasks = mct_rel_tasks + + if (concept_db or vocab) and modelpack: + raise MCTUtilsException('Cannot specify both concept_db/vocab and modelpack') + + payload = { + 'name': name, + 'description': description, + 'cuis': ','.join(cuis), + 'dataset': dataset.id, + 'members': [m.id for m in members], + 'tasks': [mt.id for mt in meta_tasks], + 'relations': [rt.id for rt in rel_tasks] + } + + if concept_db and vocab: + payload['concept_db'] = concept_db.id + payload['vocab'] = vocab.id + elif modelpack: + payload['model_pack'] = modelpack.id + + if cdb_search_filter: + payload['cdb_search_filter'] = [cdb_search_filter.id] + + if cuis_file: + with open(cuis_file, 'rb') as f: + resp = requests.post(f'{self.server}/api/project-annotate-entities/', data=payload, files={'cuis_file': f}, headers=self.headers) + else: + resp = requests.post(f'{self.server}/api/project-annotate-entities/', data=payload, headers=self.headers) + if 200 <= resp.status_code < 300: + resp_json = json.loads(resp.text) + return MCTProject(id=resp_json['id'], name=name, description=description, cuis=cuis, + dataset=dataset, concept_db=concept_db, vocab=vocab, members=members, + meta_tasks=meta_tasks, rel_tasks=rel_tasks) + else: + raise MCTUtilsException(f'Failed to create project with name: {name}', resp.text) + + def create_dataset(self, name: str, dataset_file: str): + """Create a new dataset in the MedCATTrainer session. + + Args: + name (str): The name of the dataset. + dataset_file (str): The path to the dataset file. + + Raises: + MCTUtilsException: If the dataset creation fails + + Returns: + MCTDataset: The created dataset + """ + resp = requests.post(f'{self.server}/api/datasets/', headers=self.headers, + data={'name': name}, + files={'original_file': open(dataset_file, 'rb')}) + if 200 <= resp.status_code < 300: + resp_json = json.loads(resp.text) + return MCTDataset(name=name, id=resp_json['id']) + else: + raise MCTUtilsException(f'Failed to create dataset with name: {name}', resp.text) + + def create_user(self, username: str, password): + """Create a new user in the MedCATTrainer session. + + Args: + username (str): The username of the new user. + password (str): The password of the new user. + + Raises: + MCTUtilsException: If the user creation fails + + Returns: + MCTUser: The created user + """ + payload = { + 'username': username, + 'password': password + } + resp = requests.post(f'{self.server}/api/users/', json=payload, headers=self.headers) + if 200 <= resp.status_code < 300: + resp_json = json.loads(resp.text) + return MCTUser(username=username, id=resp_json['id']) + else: + raise MCTUtilsException(f'Failed to create new user with username: {username}', resp.text) + + def create_medcat_model(self, cdb:MCTConceptDB, vocab: MCTVocab): + """Create a new MedCAT cdb and vocab model in the MedCATTrainer session. + + Args: + cdb (MCTConceptDB): The concept database to be created. + vocab (MCTVocab): The vocabulary to be created. + + Raises: + MCTUtilsException: If the model creation fails + """ + resp = requests.post(f'{self.server}/api/concept-dbs/', headers=self.headers, + data={'name': cdb.name, 'use_for_training': cdb.use_for_training}, + files={'cdb_file': open(cdb.conceptdb_file, 'rb')}) + if 200 <= resp.status_code < 300: + resp_json = json.loads(resp.text) + cdb.id = resp_json['id'] + else: + raise MCTUtilsException(f'Failed uploading MedCAT cdb model: {cdb}', resp.text) + + resp = requests.post(f'{self.server}/api/vocabs/', headers=self.headers, + data={'name': vocab.name}, + files={'vocab_file': open(vocab.vocab_file, 'rb')}) + if 200 <= resp.status_code < 300: + resp_json = json.loads(resp.text) + vocab.id = resp_json['id'] + else: + raise MCTUtilsException(f'Failed uploading MedCAT vocab model: {vocab}', resp.text) + + return cdb, vocab + + def create_medcat_model_pack(self, model_pack: MCTModelPack): + """Create a new MedCAT model pack in the MedCATTrainer session. + + Args: + model_pack (MCTModelPack): The model pack to be created. + + Raises: + MCTUtilsException: If the model pack creation fails + """ + resp = requests.post(f'{self.server}/api/modelpacks/', headers=self.headers, + data={'name': model_pack.name}, + files={'model_pack': open(model_pack.model_pack_zip, 'rb')}) + if 200 <= resp.status_code < 300: + resp_json = json.loads(resp.text) + model_pack.id = resp_json['id'] + else: + raise MCTUtilsException(f'Failed uploading model pack: {model_pack.model_pack_zip}', resp.text) + + def get_users(self) -> List[MCTUser]: + """Get all users in the MedCATTrainer instance. + + Returns: + List[MCTUser]: A list of all users in the MedCATTrainer instance + """ + users = json.loads(requests.get(f'{self.server}/api/users/', headers=self.headers).text)['results'] + return [MCTUser(id=u['id'], username=u['username']) for u in users] + + def get_models(self) -> Tuple[List[str], List[str]]: + """Get all MedCAT cdb and vocab models in the MedCATTrainer instance. + + Returns: + Tuple[List[MCTConceptDB], List[MCTVocab]]: A tuple of lists of all MedCAT cdb and vocab models in the MedCATTrainer instance + """ + cdbs = json.loads(requests.get(f'{self.server}/api/concept-dbs/', headers=self.headers).text)['results'] + vocabs = json.loads(requests.get(f'{self.server}/api/vocabs/', headers=self.headers).text)['results'] + mct_cdbs = [MCTConceptDB(id=cdb['id'], name=cdb['name'], conceptdb_file=cdb['cdb_file']) for cdb in cdbs] + mct_vocabs = [MCTVocab(id=v['id'], name=v['name'], vocab_file=v['vocab_file']) for v in vocabs] + return mct_cdbs, mct_vocabs + + def get_model_packs(self) -> List[MCTModelPack]: + """Get all MedCAT model packs in the MedCATTrainer instance. + + Returns: + List[MCTModelPack]: A list of all MedCAT model packs in the MedCATTrainer instance + """ + resp = json.loads(requests.get(f'{self.server}/api/modelpacks/', headers=self.headers).text)['results'] + mct_model_packs = [MCTModelPack(id=mp['id'], name=mp['name'], model_pack_zip=mp['model_pack']) for mp in resp] + return mct_model_packs + + def get_meta_tasks(self) -> List[MCTMetaTask]: + """Get all MedCAT meta tasks that have been created in the MedCATTrainer instance. + + Returns: + List[MCTMetaTask]: A list of all MedCAT meta tasks in the MedCATTrainer instance + """ + resp = json.loads(requests.get(f'{self.server}/api/meta-tasks/', headers=self.headers).text)['results'] + mct_meta_tasks = [MCTMetaTask(name=mt['name'], id=mt['id']) for mt in resp] + return mct_meta_tasks + + def get_rel_tasks(self) -> List[MCTRelTask]: + """Get all MedCAT relation tasks that have been created in the MedCATTrainer instance. + + Returns: + List[MCTRelTask]: A list of all MedCAT relation tasks in the MedCATTrainer instance + """ + resp = json.loads(requests.get(f'{self.server}/api/relations/', headers=self.headers).text)['results'] + mct_rel_tasks = [MCTRelTask(name=rt['label'], id=rt['id']) for rt in resp] + return mct_rel_tasks + + def get_projects(self) -> List[MCTProject]: + """Get all MedCAT annotation projects that have been created in the MedCATTrainer instance. + + Returns: + List[MCTProject]: A list of all MedCAT annotation projects in the MedCATTrainer instance + """ + resp = json.loads(requests.get(f'{self.server}/api/project-annotate-entities/', headers=self.headers).text)['results'] + mct_projects = [MCTProject(id=p['id'], name=p['name'], description=p['description'], cuis=p['cuis'], + dataset=MCTDataset(id=p['id']), + concept_db=MCTConceptDB(id=p['concept_db']), + vocab=MCTVocab(id=p['vocab']), + members=[MCTUser(id=u) for u in p['members']], + meta_tasks=[MCTMetaTask(id=mt) for mt in p['tasks']], + rel_tasks=[MCTRelTask(id=rt) for rt in p['relations']]) for p in resp] + return mct_projects + + def get_datasets(self) -> List[MCTDataset]: + """Get all datasets that have been created in the MedCATTrainer instance. + + Returns: + List[MCTDataset]: A list of all datasets in the MedCATTrainer instance + """ + resp = json.loads(requests.get(f'{self.server}/api/datasets/', headers=self.headers).text)['results'] + mct_datasets = [MCTDataset(name=d['name'], dataset_file=d['original_file'], id=d['id']) for d in resp] + return mct_datasets + + def get_project_annos(self, projects: List[MCTProject]): + """Get the annotations for a list of projects. Schema is documented here: https://github.com/medcat/MedCATtrainer/blob/main/docs/api.md#download-annotations + + Args: + projects (List[MCTProject]): A list of projects to get annotations for + + Returns: + List[MCTProject]: A list of all projects with annotations + """ + if any(p.id is None for p in projects): + raise MCTUtilsException('One or more project.id are None and all are required to download annotations') + + resp = json.loads(requests.get(f'{self.server}/api/download-annos/?project_ids={",".join([str(p.id) for p in projects])}&with_text=1', + headers=self.headers).text) + return resp + + def __str__(self) -> str: + return f'{self.server} \t {self.username} \t {self.password}' + + +class MCTUtilsException(Exception): + """Base exception for MedCAT Trainer API errors""" + def __init__(self, message, original_exception=None): + self.message = message + self.original_exception = original_exception + super().__init__(self.message) + + def __str__(self): + return f'{self.message} \n {self.original_exception}' + diff --git a/client/pyproject.toml b/client/pyproject.toml new file mode 100644 index 00000000..6ac6a684 --- /dev/null +++ b/client/pyproject.toml @@ -0,0 +1,18 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "medcattrainer-client" +version = "1.0.0" +description = "Python client for interacting with a MedCATTrainer instance" +readme = "client/README.md" +requires-python = ">=3.10" +license = { file = "LICENSE" } +authors = [{ name = "Tom Searle", email = "tom@cogstack.org" }] +dependencies = ["requests"] + +[project.urls] +Homepage = "https://github.com/CogStack/MedCATtrainer/" +Documentation = "https://medcattrainer.readthedocs.io/en/latest/" +Source = "https://github.com/CogStack/MedCATtrainer/" diff --git a/client/tests/test_mctclient.py b/client/tests/test_mctclient.py new file mode 100644 index 00000000..c06b8ae6 --- /dev/null +++ b/client/tests/test_mctclient.py @@ -0,0 +1,119 @@ +import json +import unittest +from unittest.mock import patch, MagicMock +from mctclient import ( + MedCATTrainerSession, MCTDataset, MCTConceptDB, MCTVocab, MCTModelPack, MCTMetaTask, MCTRelTask, MCTUser, MCTProject +) + +class TestMCTClient(unittest.TestCase): + + @patch('mctclient.requests.post') + @patch('mctclient.requests.get') + def test_session_get_projects(self, mock_get, mock_post): + # Mock authentication + mock_post.return_value = MagicMock(status_code=200, text='{"token": "abc"}') + # Mock get_projects with a real project structure + mock_project = { + "id": 1, + "name": "Test Project", + "description": "A test project", + "cuis": "C001,C002", + "dataset": 10, + "concept_db": 20, + "vocab": 30, + "members": [100, 101], + "tasks": [200], + "relations": [300] + } + mock_get.return_value = MagicMock( + status_code=200, + text=json.dumps({"results": [mock_project]}) + ) + session = MedCATTrainerSession(server='http://localhost', username='u', password='p') + projects = session.get_projects() + self.assertIsInstance(projects, list) + self.assertEqual(len(projects), 1) + project = projects[0] + self.assertIsInstance(project, MCTProject) + self.assertEqual(project.name, "Test Project") + self.assertEqual(project.description, "A test project") + self.assertEqual(project.cuis, "C001,C002") + self.assertIsInstance(project.dataset, MCTDataset) + self.assertIsInstance(project.concept_db, MCTConceptDB) + self.assertIsInstance(project.vocab, MCTVocab) + self.assertTrue(all(isinstance(m, MCTUser) for m in project.members)) + self.assertTrue(all(isinstance(mt, MCTMetaTask) for mt in project.meta_tasks)) + self.assertTrue(all(isinstance(rt, MCTRelTask) for rt in project.rel_tasks)) + + @patch('mctclient.requests.post') + def test_create_project(self, mock_post): + # Mock authentication + def post_side_effect(url, *args, **kwargs): + if url.endswith('/api/api-token-auth/'): + return MagicMock(status_code=200, text='{"token": "abc"}') + elif url.endswith('/api/project-annotate-entities/'): + # Return a response with all fields needed for MCTProject + return MagicMock( + status_code=200, + text=json.dumps({ + 'id': '3', + 'name': 'My Project', + 'description': 'desc', + 'cuis': 'C001,C002', + 'dataset': '2', + 'concept_db': '20', + 'vocab': '30', + 'members': ['1'], + 'tasks': ['200'], + 'relations': ['300'] + }), + json=lambda: { + 'id': '3', + 'name': 'My Project', + 'description': 'desc', + 'cuis': 'C001,C002', + 'dataset': '2', + 'concept_db': '20', + 'vocab': '30', + 'members': ['1'], + 'tasks': ['200'], + 'relations': ['300'] + } + ) + else: + return MagicMock(status_code=404, text='') + + mock_post.side_effect = post_side_effect + + session = MedCATTrainerSession(server='http://localhost', username='u', password='p') + user = MCTUser(id='1', username='testuser') + dataset = MCTDataset(id='2', name='TestDS', dataset_file='file.csv') + concept_db = MCTConceptDB(id='20', name='testCDB', conceptdb_file='cdb.dat') + vocab = MCTVocab(id='30', name='testVocab', vocab_file='vocab.dat') + meta_task = MCTMetaTask(id='200', name='TestMetaTask') + rel_task = MCTRelTask(id='300', name='TestRelTask') + + project = session.create_project( + name='My Project', + description='desc', + cuis='C001,C002', + members=[user], + dataset=dataset, + concept_db=concept_db, + vocab=vocab, + meta_tasks=[meta_task], + rel_tasks=[rel_task] + ) + self.assertIsInstance(project, MCTProject) + self.assertEqual(project.name, 'My Project') + self.assertEqual(project.description, 'desc') + self.assertEqual(project.cuis, 'C001,C002') + self.assertIsInstance(project.dataset, MCTDataset) + self.assertIsInstance(project.concept_db, MCTConceptDB) + self.assertIsInstance(project.vocab, MCTVocab) + self.assertEqual(project.members, [user]) + self.assertEqual(project.meta_tasks, [meta_task]) + self.assertEqual(project.rel_tasks, [rel_task]) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From af516f823128ccae77eba36092cd393c9405d3c8 Mon Sep 17 00:00:00 2001 From: Tom Searle Date: Tue, 22 Jul 2025 10:38:34 +0100 Subject: [PATCH 2/4] CU-8699wjhfu: add build dep --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1c737e66..b0a00cab 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install requests pytest + pip install requests pytest build - name: Install client package in development mode run: | From d30278d17d08349bd6495cbd7ae4d2a457a5e014 Mon Sep 17 00:00:00 2001 From: Tom Searle Date: Tue, 22 Jul 2025 14:33:24 +0100 Subject: [PATCH 3/4] CU-8699wjhfu: add client docs to docs site --- docs/client.md | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++ docs/index.rst | 3 +- 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 docs/client.md diff --git a/docs/client.md b/docs/client.md new file mode 100644 index 00000000..d5d13132 --- /dev/null +++ b/docs/client.md @@ -0,0 +1,88 @@ + +--- + +# MedCATtrainer Client + +A Python client for interacting with a MedCATTrainer web application instance. This package allows you to manage datasets, concept databases, vocabularies, model packs, users, projects, and more via Python code or the command line. + +## Features + +- Manage datasets, concept databases, vocabularies, and model packs +- Create and manage users and projects +- Retrieve and upload project annotations +- Command-line interface (CLI) for automation + +## Installation + +```sh +pip install mctclient +``` + +Or, if installing from source: + +```sh +cd client +python -m build +pip install dist/*.whl +``` + +## Python Usage + +```sh +export MCTRAINER_USERNAME= +export MCTRAINER_PASSWORD= +``` + +```python +from mctclient import MedCATTrainerSession, MCTDataset, MCTConceptDB, MCTVocab, MCTModelPack, MCTMetaTask, MCTRelTask, MCTUser, MCTProject + +# Connect to your MedCATTrainer instance +session = MedCATTrainerSession(server="http://localhost:8001") + +# List all projects +projects = session.get_projects() +for project in projects: + print(project) + +# Create a new dataset +dataset = session.create_dataset(name="My Dataset", dataset_file="path/to/data.csv") + +# Create a new user +user = session.create_user(username="newuser", password="password123") + +# Create a new project +project = session.create_project( + name="My Project", + description="A new annotation project", + members=[user], + dataset=dataset +) +``` + +### MedCATTrainerSession Methods + +- `create_project(name, description, members, dataset, cuis=[], cuis_file=None, concept_db=None, vocab=None, cdb_search_filter=None, modelpack=None, meta_tasks=[], rel_tasks=[])` +- `create_dataset(name, dataset_file)` +- `create_user(username, password)` +- `create_medcat_model(cdb, vocab)` +- `create_medcat_model_pack(model_pack)` +- `get_users()` +- `get_models()` +- `get_model_packs()` +- `get_meta_tasks()` +- `get_rel_tasks()` +- `get_projects()` +- `get_datasets()` +- `get_project_annos(projects)` + +Each method returns the corresponding object or a list of objects. + +## License + +This project is licensed under the Apache 2.0 License. + +## Contributing + +Pull requests are welcome! For major changes, please open an issue first to discuss what you would like to change. + + diff --git a/docs/index.rst b/docs/index.rst index 46f3841e..b7758d22 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -19,7 +19,8 @@ Welcome to MedCATtrainer's documentation! annotator_guide.md meta_annotations.md advanced_usage.md - maintanence.md + maintenance.md + client.md Indices and tables From 18df430c3c5a6c45519a5bfad202d77784d1699f Mon Sep 17 00:00:00 2001 From: Tom Searle Date: Tue, 22 Jul 2025 14:53:34 +0100 Subject: [PATCH 4/4] CU-8699wjhfu: add example .ipyb example of client API --- notebook_docs/Client_API_Tutorials.ipynb | 485 +++++++++++++++++++++++ 1 file changed, 485 insertions(+) create mode 100644 notebook_docs/Client_API_Tutorials.ipynb diff --git a/notebook_docs/Client_API_Tutorials.ipynb b/notebook_docs/Client_API_Tutorials.ipynb new file mode 100644 index 00000000..3cecbdcb --- /dev/null +++ b/notebook_docs/Client_API_Tutorials.ipynb @@ -0,0 +1,485 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Client API Tutorial\n", + "\n", + "This notebook demonstrates how to use the `MedCATTrainerSession` class to interact with the MedCATTrainer API. We'll cover:\n", + "\n", + "1. Setting up a MedCATTrainer session\n", + "2. Exploring available resources (users, datasets, models)\n", + "3. Creating new resources (datasets, models, users)\n", + "4. Creating annotation projects with different approaches\n", + "5. Downloading and saving annotations\n", + "\n", + "These steps provide a complete workflow for programmatically managing medical text annotation projects with MedCATTrainer." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "__SETUP:__\n", + "\n", + "You need to have [MedCATtrainer service running locally](http://localhost:8001/)\n", + "\n", + "The default credentials when setup is:\n", + "\n", + "```bash\n", + "username: admin\n", + "password: admin\n", + "```\n", + "\n", + "The administrative console can be found here: http://localhost:8001/admin/\n", + "\n", + "Within this admin console is where you can manually interact the the MedCATtrainer program and setup projects\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Setup and Authentication\n", + "\n", + "First, let's import the necessary classes and set up our session:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json\n", + "import sys\n", + "sys.path.append('../client')\n", + "from mctclient import MedCATTrainerSession, MCTDataset, MCTConceptDB, MCTVocab, MCTModelPack, MCTUser, MCTProject" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the session\n", + "\n", + "# Set environment variables for authentication, These are default and are optional.\n", + "os.environ['MCTRAINER_USERNAME'] = 'admin'\n", + "os.environ['MCTRAINER_PASSWORD'] = 'admin'\n", + "mct_server = 'http://localhost:8001' # Default server is http://localhost:8001 if not specified\n", + "# session = MedCATTrainerSession()\n", + "\n", + "# Initialize the session and change explicit arguements if required.\n", + "session = MedCATTrainerSession(server=mct_server, username='admin', password='admin') # Wrapper for the MedCATTrainer API." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Explore Available Resources\n", + "\n", + "Let's check what resources are already available in the MedCATTrainer instance:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Users:\n", + "3 : annotator2\n", + "2 : annotator1\n", + "1 : admin\n", + "\n", + "Datasets:\n", + "1 : Example Dataset \t http://localhost:8001/media/Example_Dataset.csv\n", + "2 : Neurology Notes \t http://localhost:8001/media/neurology_notes.csv\n", + "3 : SG-example-docs \t http://localhost:8001/media/sg-sample-docs.csv\n", + "\n", + "Concept DBs:\n", + "1 : umls_cdb \t http://localhost:8001/media/cdb.dat\n", + "2 : snomed_cdb \t http://localhost:8001/media/snomed-cdb.dat\n", + "3 : snomed_2022_modelpack_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/20230227__kch_gstt_trained_model_494c3717f637bb89/cdb.dat\n", + "8 : medcat_full_pack_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/medcat_model_pack_u3fB9G5/cdb.dat\n", + "12 : snomed-2023-bert-metacats_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/20230227__kch_gstt_trained_model_bert_metacats_138689a7bb83cb0a/cdb.dat\n", + "13 : de_id_modelpack_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/medcat_deid_trained_a7120281ebb9fc9e/cdb.dat\n", + "\n", + "Vocabularies:\n", + "1 : http://localhost:8001/media/vocab.dat\n", + "3 : http://localhost:8001/media/20230227__kch_gstt_trained_model_494c3717f637bb89/vocab.dat\n", + "12 : http://localhost:8001/media/20230227__kch_gstt_trained_model_bert_metacats_138689a7bb83cb0a/vocab.dat\n", + "\n", + "ModelPacks:\n", + "1 : snomed_2022_modelpack \t http://localhost:8001/media/20230227__kch_gstt_trained_model_494c3717f637bb89.zip\n", + "9 : snomed-2023-bert-metacats \t http://localhost:8001/media/20230227__kch_gstt_trained_model_bert_metacats_138689a7bb83cb0a.zip\n", + "10 : de-id modelpack \t http://localhost:8001/media/medcat_deid_trained_a7120281ebb9fc9e.zip\n", + "\n", + "Meta Tasks:\n", + "1 : Experiencer\n", + "2 : Presence\n", + "3 : Subject\n", + "4 : Temporality\n", + "5 : Time\n", + "\n", + "Relation Tasks:\n", + "1 : Spatial\n" + ] + } + ], + "source": [ + "# Get users\n", + "users = session.get_users()\n", + "print(\"Users:\")\n", + "for user in users:\n", + " print(user)\n", + "print()\n", + "\n", + "# Get datasets\n", + "datasets = session.get_datasets()\n", + "print(\"Datasets:\")\n", + "for dataset in datasets:\n", + " print(dataset)\n", + "print()\n", + "\n", + "# Get concept databases and vocabularies\n", + "concept_dbs, vocabs = session.get_models()\n", + "print(\"Concept DBs:\")\n", + "for cdb in concept_dbs:\n", + " print(cdb)\n", + "print()\n", + "print(\"Vocabularies:\")\n", + "for vocab in vocabs:\n", + " print(vocab)\n", + "print()\n", + "\n", + "# Get modelpacks\n", + "model_packs = session.get_model_packs()\n", + "print(\"ModelPacks:\")\n", + "for model_pack in model_packs:\n", + " print(model_pack)\n", + "print()\n", + "\n", + "# Get meta tasks\n", + "meta_tasks = session.get_meta_tasks()\n", + "print(\"Meta Tasks:\")\n", + "for i, task in enumerate(meta_tasks):\n", + " print(f\"{i+1} : {task.name}\")\n", + "print()\n", + "\n", + "# Get relation tasks\n", + "rel_tasks = session.get_rel_tasks()\n", + "print(\"Relation Tasks:\")\n", + "for i, task in enumerate(rel_tasks):\n", + " print(f\"{i+1} : {task.name}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Upload new resources to MedCATtrainer\n", + "\n", + "Before we create a project we need to create and upload all the required resources. We'll start with a dataset:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a new dataset to be annotated.\n", + "neurology_dataset = session.create_dataset(\n", + " name=\"Neurology Notes\", # Names must be unique\n", + " dataset_file=\"./example_data/neuro.csv\" # This csv should have atleast these 2 columns. [\"name\", \"text\"]\n", + ")\n", + "print(f\"Created dataset: {neurology_dataset}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.1 Creating MedCAT Models\n", + "\n", + "We have two options for creating models:\n", + "\n", + "1. Upload separate CDB and Vocab files\n", + "2. Upload a complete model pack ZIP\n", + "\n", + "Let's explore both approaches:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# If you don't have these medcat components or modelpack. You can download an example here:\n", + "# Download vocab.dat\n", + "!wget -O ./example_data/vocab.dat https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/vocab.dat\n", + "# Download snomed-cdb-mc-v1.cdb\n", + "!wget -O ./example_data/snomed-cdb-mc-v1.cdb https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/snomed-cdb-mc-v1.cdb\n", + "# Download model pack (this is a zip file)\n", + "!wget -O ./example_data/medcat_model_pack.zip https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/medcat_model_pack_c4e0d25701ce4e88.zip\n", + "\n", + "# Otherwise Skip this" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Option 1: Upload separate CDB and Vocab files\n", + "example_cdb = MCTConceptDB(name=\"example_cdbv1\", conceptdb_file=\"./example_data/snomed-cdb-mc-v1.cdb\")\n", + "example_vocab = MCTVocab(name=\"example_vocabv2\", vocab_file=\"./example_data/vocab.dat\")\n", + "\n", + "# Create the model in the MedCATTrainer instance\n", + "cdb, vocab = session.create_medcat_model(example_cdb, example_vocab)\n", + "print(f\"Created CDB: {cdb}\")\n", + "print(f\"Created Vocab: {vocab}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Option 2: Upload a complete modelpack ZIP\n", + "# This contains CDB, Vocab, and potentially MetaCAT and RelCAT models\n", + "medcat_model_pack = MCTModelPack(\n", + " name=\"medcat_full_pack\",\n", + " model_pack_zip=\"./medcat_model_pack.zip\"\n", + ")\n", + "session.create_medcat_model_pack(medcat_model_pack)\n", + "print(f\"Created model pack: {medcat_model_pack}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.2 Creating a New User\n", + "\n", + "If we need to add an annotator to our project:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "new_user = session.create_user(username=\"annotator1\", password=\"secure_password\")\n", + "print(f\"Created user: {new_user}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Creating Annotation Projects\n", + "\n", + "Now we can create annotation projects using our resources:\n", + "\n", + "But first, Let's check again what resources are now available in the MedCATTrainer instance after Part 3:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get users\n", + "users = session.get_users()\n", + "print(\"Users:\")\n", + "for user in users:\n", + " print(user)\n", + "print()\n", + "\n", + "# Get datasets\n", + "datasets = session.get_datasets()\n", + "print(\"Datasets:\")\n", + "for dataset in datasets:\n", + " print(dataset)\n", + "print()\n", + "\n", + "# Get concept databases and vocabularies\n", + "concept_dbs, vocabs = session.get_models()\n", + "print(\"Concept DBs:\")\n", + "for cdb in concept_dbs:\n", + " print(cdb)\n", + "print()\n", + "print(\"Vocabularies:\")\n", + "for vocab in vocabs:\n", + " print(vocab)\n", + "print()\n", + "\n", + "# Get modelpacks\n", + "model_packs = session.get_model_packs()\n", + "print(\"ModelPacks:\")\n", + "for model_pack in model_packs:\n", + " print(model_pack)\n", + "print()\n", + "\n", + "# Get meta tasks\n", + "meta_tasks = session.get_meta_tasks()\n", + "print(\"Meta Tasks:\")\n", + "for i, task in enumerate(meta_tasks):\n", + " print(f\"{i+1} : {task.name}\")\n", + "print()\n", + "\n", + "# Get relation tasks\n", + "rel_tasks = session.get_rel_tasks()\n", + "print(\"Relation Tasks:\")\n", + "for i, task in enumerate(rel_tasks):\n", + " print(f\"{i+1} : {task.name}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Method 1: Create a project with separate CDB and Vocab\n", + "neuro_project = session.create_project(\n", + " name=\"Neurology Annotation Project\",\n", + " description=\"Demo annotation project of neurology conditions, epilepsy & seizure\",\n", + " members=[user for user in users], # Add all users...\n", + " dataset=datasets[-1],\n", + " concept_db=concept_dbs[-1],\n", + " vocab=vocabs[-1],\n", + " cuis=[\"84757009\", \"91175000\"], # Whitelist Filter CUIs/concepts\n", + " #meta_tasks=[\"Temporality\", \"Certainty\"], # Can specify by name or by object\n", + " #rel_tasks=[\"Has_Finding\"] # only add this relational extraction task if absolutely required\n", + ")\n", + "\n", + "print(f\"Created project: {neuro_project}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Method 2: Create a project with a modelpack\n", + "\n", + "# Rerun the explore resources to run the following code:\n", + "general_project = session.create_project(\n", + " name=\"Demo General Medical Annotation\",\n", + " description=\"Annotation of neurology medical conditions\",\n", + " members=[user for user in users], # All users\n", + " dataset=datasets[-1], # Use existing dataset\n", + " modelpack=model_packs[-1], # Use existing model pack\n", + " # cuis_file=\"./resources/mct_filter.json\", # Load whitelist concepts from a file [\"concept1\", \"concept2\"]\n", + ")\n", + "\n", + "print(f\"Created project with model pack: {general_project}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Retrieving Project Annotations\n", + "\n", + "After annotators have worked on the projects, we can download the annotations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get all projects\n", + "mct_projects = session.get_projects()\n", + "\n", + "# Download annotations for all projects\n", + "projects = session.get_project_annos(mct_projects)\n", + "\n", + "print(f\"Downloaded annotations for {len(mct_projects)} projects:\")\n", + "for p in projects['projects']:\n", + " print(p['name'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Inspect all details from a single export\n", + "projects['projects'][0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Saving Annotations for Analysis\n", + "\n", + "Finally, let's save the annotations to a file for later analysis:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Save MCT export / annotations to a file\n", + "with open(\"./example_data/medical_annotations.json\", \"w\") as f:\n", + " json.dump(projects, f, indent=2)\n", + "\n", + "print(\"Annotations saved to ./example_data/medical_annotations.json\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# End of Tutorial" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "bioext-medcat-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}