From 712e41fa0a99f038b76990a87f34d81eca00a7b3 Mon Sep 17 00:00:00 2001
From: Tom Searle <tom@cogstack.org>
Date: Tue, 22 Jul 2025 00:19:38 +0100
Subject: [PATCH 1/4] CU-8699wjhfu: make client available and publishable

---
 .github/workflows/ci.yml       |  36 ++-
 .github/workflows/qa.yml       |  42 ++-
 .github/workflows/release.yml  |  45 ++-
 client/README.md               |  88 ++++++
 client/__init__.py             |   0
 client/mctclient.py            | 547 +++++++++++++++++++++++++++++++++
 client/pyproject.toml          |  18 ++
 client/tests/test_mctclient.py | 119 +++++++
 8 files changed, 892 insertions(+), 3 deletions(-)
 create mode 100644 client/README.md
 create mode 100644 client/__init__.py
 create mode 100644 client/mctclient.py
 create mode 100644 client/pyproject.toml
 create mode 100644 client/tests/test_mctclient.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ad8070fa..1c737e66 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -3,10 +3,44 @@ name: ci-build
 on: [push]
 
 jobs:
-  # run tests / lint / etc. before building container image?
+  # Test and build client library
+  test-client:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout main
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.ref }}
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install requests pytest
+
+      - name: Install client package in development mode
+        run: |
+          cd client
+          pip install -e .
+
+      - name: Run client tests
+        run: |
+          cd client
+          python -m pytest tests/ -v
+
+      - name: Build client package
+        run: |
+          cd client
+          python -m build
 
+  # Build and test webapp container
   build-and-push:
     runs-on: ubuntu-latest
+    needs: test-client
     steps:
       - name: Checkout main
         uses: actions/checkout@v4
diff --git a/.github/workflows/qa.yml b/.github/workflows/qa.yml
index f7df2837..9e71a032 100644
--- a/.github/workflows/qa.yml
+++ b/.github/workflows/qa.yml
@@ -5,10 +5,50 @@ on:
     branches: [ main ]
 
 jobs:
-  # run tests / lint / etc. before building container image?
+  # Test and build client library
+  test-client:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout main
+        uses: actions/checkout@v4
+        with:
+          ref: 'main'
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install requests pytest build
+
+      - name: Install client package in development mode
+        run: |
+          cd client
+          pip install -e .
+
+      - name: Run client tests
+        run: |
+          cd client
+          python -m pytest tests/ -v
+
+      - name: Build client package
+        run: |
+          cd client
+          python -m build
+
+      - name: Publish dev distribution to Test PyPI
+        uses: pypa/gh-action-pypi-publish@v1.4.2
+        with:
+          password: ${{ secrets.TEST_PYPI_API_TOKEN }}
+          repository_url: https://test.pypi.org/legacy/
 
+  # Build and test webapp container
   build-and-push:
     runs-on: ubuntu-latest
+    needs: test-client
     steps:
       - name: Checkout main
         uses: actions/checkout@v4
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 56f94e42..8d7b79d7 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -5,10 +5,53 @@ on:
     tags: ["v*.*.*"]
 
 jobs:
-  # run tests / lint / etc. before building container image?
+  # Test, build and publish client library
+  test-and-publish-client:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout main
+        uses: actions/checkout@v4
+        with:
+          ref: "main"
+
+      - name: Release Tag
+        run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install requests pytest build twine
+
+      - name: Install client package in development mode
+        run: |
+          cd client
+          pip install -e .
+
+      - name: Run client tests
+        run: |
+          cd client
+          python -m pytest tests/ -v
+
+      - name: Build client package
+        run: |
+          cd client
+          python -m build
+
+      - name: Publish production distribution to PyPI
+        if: startsWith(github.ref, 'refs/tags') && ! github.event.release.prerelease
+        uses: pypa/gh-action-pypi-publish@v1.4.2
+        with:
+          password: ${{ secrets.PYPI_API_TOKEN }}
 
+  # Build and test webapp container
   build-and-push:
     runs-on: ubuntu-latest
+    needs: test-and-publish-client
     steps:
       - name: Checkout main
         uses: actions/checkout@v4
diff --git a/client/README.md b/client/README.md
new file mode 100644
index 00000000..d5d13132
--- /dev/null
+++ b/client/README.md
@@ -0,0 +1,88 @@
+
+---
+
+# MedCATtrainer Client
+
+A Python client for interacting with a MedCATTrainer web application instance. This package allows you to manage datasets, concept databases, vocabularies, model packs, users, projects, and more via Python code or the command line.
+
+## Features
+
+- Manage datasets, concept databases, vocabularies, and model packs
+- Create and manage users and projects
+- Retrieve and upload project annotations
+- Command-line interface (CLI) for automation
+
+## Installation
+
+```sh
+pip install mctclient
+```
+
+Or, if installing from source:
+
+```sh
+cd client
+python -m build
+pip install dist/*.whl
+```
+
+## Python Usage
+
+```sh
+export MCTRAINER_USERNAME=<username>
+export MCTRAINER_PASSWORD=<password>
+```
+
+```python
+from mctclient import MedCATTrainerSession, MCTDataset, MCTConceptDB, MCTVocab, MCTModelPack, MCTMetaTask, MCTRelTask, MCTUser, MCTProject
+
+# Connect to your MedCATTrainer instance
+session = MedCATTrainerSession(server="http://localhost:8001")
+
+# List all projects
+projects = session.get_projects()
+for project in projects:
+    print(project)
+
+# Create a new dataset
+dataset = session.create_dataset(name="My Dataset", dataset_file="path/to/data.csv")
+
+# Create a new user
+user = session.create_user(username="newuser", password="password123")
+
+# Create a new project
+project = session.create_project(
+    name="My Project",
+    description="A new annotation project",
+    members=[user],
+    dataset=dataset
+)
+```
+
+### MedCATTrainerSession Methods
+
+- `create_project(name, description, members, dataset, cuis=[], cuis_file=None, concept_db=None, vocab=None, cdb_search_filter=None, modelpack=None, meta_tasks=[], rel_tasks=[])`
+- `create_dataset(name, dataset_file)`
+- `create_user(username, password)`
+- `create_medcat_model(cdb, vocab)`
+- `create_medcat_model_pack(model_pack)`
+- `get_users()`
+- `get_models()`
+- `get_model_packs()`
+- `get_meta_tasks()`
+- `get_rel_tasks()`
+- `get_projects()`
+- `get_datasets()`
+- `get_project_annos(projects)`
+
+Each method returns the corresponding object or a list of objects.
+
+## License
+
+This project is licensed under the Apache 2.0 License.
+
+## Contributing
+
+Pull requests are welcome! For major changes, please open an issue first to discuss what you would like to change.
+
+
diff --git a/client/__init__.py b/client/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/client/mctclient.py b/client/mctclient.py
new file mode 100644
index 00000000..2d4b0370
--- /dev/null
+++ b/client/mctclient.py
@@ -0,0 +1,547 @@
+from dataclasses import dataclass
+import json
+import os
+from abc import ABC
+from typing import List, Tuple, Union
+
+import requests
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class MCTObj(ABC):
+    id: str=None
+
+    def valid(self):
+        return self.id is not None
+
+
+@dataclass
+class MCTDataset(MCTObj):
+    """A dataset in the MedCATTrainer instance.
+
+    Attributes:
+        name (str): The name of the dataset.
+        dataset_file (str): The path to the dataset file, can be a csv, or excel file, with at
+            least 2 columns: 'name': unique identifier for each text, and 'text': the text to be annotated.
+    """
+    name: str=None
+    dataset_file: str=None
+
+    def __str__(self):
+        return f'{self.id} : {self.name} \t {self.dataset_file}'
+
+
+@dataclass
+class MCTConceptDB(MCTObj):
+    """A concept database in the MedCATTrainer instance.
+
+    Attributes:
+        name (str): The name of the concept database. Name must start with a lowercase letter and contain only alphanumeric characters and underscores.
+        conceptdb_file (str): The path to the concept database file, should be a <conceptdb_name>.dat file.
+        use_for_training (bool): Whether to use the concept database for training. Defaults to True as most uploaded CDBs will be used for training, unless they are used for the concept search lookup.
+    """
+    name: str=None
+    conceptdb_file: str=None
+    use_for_training: bool=True
+
+    def __post_init__(self):
+        if self.name is not None:
+            if not self.name[0].islower():
+                raise ValueError("Name must start with a lowercase letter")
+            if not self.name.replace('_', '').replace('-', '').isalnum():
+                raise ValueError("Name must contain only alphanumeric characters and underscores")
+
+    def __str__(self):
+        return f'{getattr(self, "id", "N/A")} : {self.name} \t {self.conceptdb_file}'
+
+
+@dataclass
+class MCTVocab(MCTObj):
+    """A vocabulary in the MedCATTrainer instance.
+
+    Attributes:
+        name (str): The name of the vocabulary.
+        vocab_file (str): The path to the vocabulary file, should be a <vocab_name>.dat file.
+    """
+    name: str=None
+    vocab_file: str=None
+
+    def __str__(self):
+        return f'{self.id} : {self.vocab_file}'
+
+
+@dataclass
+class MCTModelPack(MCTObj):
+    """A model pack in the MedCATTrainer instance.
+
+    Attributes:
+        name (str): The name of the model pack.
+        model_pack_zip (str): The path to the model pack zip file, should be a <modelpack_name>.zip file.
+    """
+    name: str=None
+    model_pack_zip: str=None
+
+    def __str__(self):
+        return f'{self.id} : {self.name} \t {self. model_pack_zip}'
+
+
+@dataclass
+class MCTMetaTask(MCTObj):
+    """A meta task in the MedCATTrainer instance.
+
+    Attributes:
+        name (str): The name of the meta task.
+    """
+    name: str=None
+
+    def __str__(self):
+        return f'{self.id} : {self.name}'
+
+
+@dataclass
+class MCTRelTask(MCTObj):
+    """A relation extraction task in the MedCATTrainer instance.
+
+    Attributes:
+        name (str): The name of the relation extraction task.
+    """
+    name: str=None
+
+    def __str__(self):
+        return f'{self.id} : {self.name}'
+
+
+@dataclass
+class MCTUser(MCTObj):
+    """A user in the MedCATTrainer instance.
+
+    Attributes:
+        username (str): The username of the user.
+    """
+    username: str=None
+
+    def __str__(self):
+        return f'{self.id} : {self.username}'
+
+
+@dataclass
+class MCTProject(MCTObj):
+    """A project in the MedCATTrainer instance.
+
+    Attributes:
+        name (str): The name of the project.
+        description (str): The description of the project.
+        cuis (str): The CUIs to be used in the project filter.
+        dataset (MCTDataset): The dataset to be used in the project.
+        concept_db (MCTConceptDB): The concept database to be used in the project.
+        vocab (MCTVocab): The vocabulary to be used in the project.
+        members (List[MCTUser]): The annotators for the project.
+        meta_tasks (List[MCTMetaTask]): The meta tasks for the project.
+        rel_tasks (List[MCTRelTask]): The relation extraction tasks for the project.
+    """
+    name: str=None
+    description: str=None
+    cuis: str=None
+    dataset: MCTDataset=None
+    concept_db: MCTConceptDB=None
+    vocab: MCTVocab=None
+    members: List[MCTUser]=None
+    meta_tasks: List[MCTMetaTask]=None
+    rel_tasks: List[MCTRelTask]=None
+
+    def __str__(self):
+        return f'{self.id} : {self.name} \t {self.description} \t {self.dataset}'
+
+
+
+class MedCATTrainerSession:
+    """Wrapper for the MedCATTrainer API.
+    This class provides a wrapper around the MedCATTrainer API, allowing for easy creation of projects, datasets, users, and models.
+
+    Attributes:
+        server (str): The server to connect to can also be set by an ENVVAR MCTRAINER_SERVER. Defaults to http://localhost:8001.
+        username (str): The username to connect to can also be set by an ENVVAR MCTRAINER_USERNAME.
+        password (str): The password to connect to can also be set by an ENVVAR MCTRAINER_PASSWORD.
+
+    Example:
+        Create a project with a concept database, vocabulary, dataset, and user.
+
+    >>> session = MedCATTrainerSession()
+    >>> ds = session.create_dataset(name='Test DS', dataset_file='<path_to_dataset>.csv')
+    >>> cdb_file = '<model_pack_path>/cdb.dat'
+    >>> vocab_file = '<model_pack_path>/vocab.dat'
+    >>> model_pack_zip = '<model_pack_path>.zip'
+    >>> # Create a concept database and vocabulary in the MCTrainer instance. This is the NER+L model only.
+    >>> cdb, vocab = session.create_medcat_model(MCTConceptDB(name='test_cdb', conceptdb_file=cdb_file),
+                                             MCTVocab(name='test_vocab', vocab_file=vocab_file))
+    >>> # OR Create a model pack in the MCTrainer instance, NER+L, plus any MetaCAT or RelCAT models packaged together.
+    >>> session.create_medcat_model_pack(MCTModelPack(name='test_model_pack', model_pack_zip=model_pack_zip))
+    >>> session.create_project(name='test-project', description='test-description', members=[MCTUser(username='test-user')], dataset=ds, concept_db=cdb, vocab=vocab)
+
+        A common interaction would be to create a project with a new dataset but existing concept database and vocabulary or Modelpack.
+    >>> projects = session.get_projects()
+    >>> ds = session.create_dataset(name='New Test DS', dataset_file='/Users/tom/phd/MedCATtrainer/notebook_docs/example_data/cardio.csv')
+    >>> # MCTObjects can be referenced by name or by the wrapper object.
+    >>> session.create_project(name='test-project', description='test-description', members=[MCTUser(username='test-user')], dataset=ds,
+    concept_db=MCTConceptDB(name='test_cdb'), vocab=MCTVocab(name='test_vocab'))
+
+        To download annotations for a project:
+    >>> projects = session.get_projects()
+    >>> annotations = session.get_project_annos(projects[0])
+    """
+
+    def __init__(self, server=None, username=None, password=None):
+        """Initialize the MedCATTrainerSession.
+
+        Args:
+            server (_type_, optional): _description_. Defaults to None.
+
+        Raises:
+            MCTUtilsException: _description_
+        """
+        self.username = username or os.getenv("MCTRAINER_USERNAME")
+        self.password = password or os.getenv("MCTRAINER_PASSWORD")
+        self.server = server or 'http://localhost:8001'
+
+        payload = {"username": self.username, "password": self.password}
+        resp = requests.post(f"{self.server}/api/api-token-auth/", json=payload)
+        if 200 <= resp.status_code < 300:
+            token = json.loads(resp.text)["token"]
+            self.headers = {
+                'Authorization': f'Token {token}',
+            }
+        else:
+            raise MCTUtilsException(f'Failed to login to MedCATtrainer instance running at: {self.server}')
+
+    def create_project(self, name: str,
+                       description: str,
+                       members: Union[List[MCTUser], List[str]],
+                       dataset: Union[MCTDataset, str],
+                       cuis: List[str]=[],
+                       cuis_file: str=None,
+                       concept_db: Union[MCTConceptDB, str]=None,
+                       vocab: Union[MCTVocab, str]=None,
+                       cdb_search_filter: Union[MCTConceptDB, str]=None,
+                       modelpack: Union[MCTModelPack, str]=None,
+                       meta_tasks: Union[List[MCTMetaTask], List[str]]=[],
+                       rel_tasks: Union[List[MCTRelTask], List[str]]=[]):
+        """Create a new project in the MedCATTrainer session.
+        Users, models, datasets etc. can be referred to by either their client wrapper object or their name, and the ID will be retrieved
+        then used to create the project. Most names have a unique constraint on them so for the majority of cases will not results in an error.
+
+        Only a concept_db and vocab pair, or a modelpack needs to be specified.
+
+        Setting a modelpack will also eventually automatically select meta tasks and rel tasks.
+
+        Args:
+            name (str): The name of the project.
+            description (str): The description of the project.
+            members (Union[List[MCTUser], List[str]]): The annotators for the project.
+            dataset (Union[MCTDataset, str]): The dataset to be used in the project.
+            cuis (List[str]): The CUIs to be used in the project filter.
+            cuis_file (str): The file containing the CUIs to be used in the project filter, will be appended to the cuis list.
+            concept_db (Union[MCTConceptDB, str], optional): The concept database to be used in the project. Defaults to None.
+            vocab (Union[MCTVocab, str], optional): The vocabulary to be used in the project. Defaults to None.
+            cdb_search_filter (Union[MCTConceptDB, str], optional): _description_. Defaults to None.
+            modelpack (Union[MCTModelPack, str], optional): _description_. Defaults to None.
+            meta_tasks (Union[List[MCTMetaTask], List[str]], optional): _description_. Defaults to None.
+            rel_tasks (Union[List[MCTRelTask], List[str]], optional): _description_. Defaults to None.
+
+        Raises:
+            MCTUtilsException: If the project creation fails
+
+        Returns:
+            MCTProject: The created project
+        """
+
+        if all(isinstance(m, str) for m in members):
+            mct_members = [u for u in self.get_users() if u.username in members]
+            if len(mct_members) != len(members):
+                raise MCTUtilsException(f'Not all users found in MedCATTrainer instance: {members} requested, trainer members found: {mct_members}')
+            else:
+                members = mct_members
+
+        if isinstance(dataset, str):
+            try:
+                dataset = [d for d in self.get_datasets() if d.name == dataset].pop()
+            except IndexError:
+                raise MCTUtilsException(f'Dataset not found in MedCATTrainer instance: {dataset}')
+
+        if isinstance(concept_db, str):
+            try:
+                concept_db = [c for c in self.get_models()[0] if c.name == concept_db].pop()
+            except IndexError:
+                raise MCTUtilsException(f'Concept DB not found in MedCATTrainer instance: {concept_db}')
+
+        if isinstance(vocab, str):
+            try:
+                vocab = [v for v in self.get_models()[1] if v.name == vocab].pop()
+            except IndexError:
+                raise MCTUtilsException(f'Vocab not found in MedCATTrainer instance: {vocab}')
+
+        if isinstance(cdb_search_filter, str):
+            try:
+                cdb_search_filter = [c for c in self.get_concept_dbs() if c.name == cdb_search_filter].pop()
+            except IndexError:
+                raise MCTUtilsException(f'Concept DB not found in MedCATTrainer instance: {cdb_search_filter}')
+
+        if isinstance(modelpack, str):
+            try:
+                modelpack = [m for m in self.get_model_packs() if m.name == modelpack].pop()
+            except IndexError:
+                raise MCTUtilsException(f'Model pack not found in MedCATTrainer instance: {modelpack}')
+
+        if all(isinstance(m, str) for m in meta_tasks):
+            mct_meta_tasks = [m for m in self.get_meta_tasks() if m.name in meta_tasks]
+            if len(mct_meta_tasks) != len(meta_tasks):
+                raise MCTUtilsException(f'Not all meta tasks found in MedCATTrainer instance: {meta_tasks} requested, trainer meta tasks found: {mct_meta_tasks}')
+            else:
+                meta_tasks = mct_meta_tasks
+
+        if all(isinstance(r, str) for r in rel_tasks):
+            mct_rel_tasks = [r for r in self.get_rel_tasks() if r.name in rel_tasks]
+            if len(mct_rel_tasks) != len(rel_tasks):
+                raise MCTUtilsException(f'Not all rel tasks found in MedCATTrainer instance: {rel_tasks} requested, trainer rel tasks found: {mct_rel_tasks}')
+            else:
+                rel_tasks = mct_rel_tasks
+
+        if (concept_db or vocab) and modelpack:
+            raise MCTUtilsException('Cannot specify both concept_db/vocab and modelpack')
+
+        payload = {
+            'name': name,
+            'description': description,
+            'cuis': ','.join(cuis),
+            'dataset': dataset.id,
+            'members': [m.id for m in members],
+            'tasks': [mt.id for mt in meta_tasks],
+            'relations': [rt.id for rt in rel_tasks]
+        }
+
+        if concept_db and vocab:
+            payload['concept_db'] = concept_db.id
+            payload['vocab'] = vocab.id
+        elif modelpack:
+            payload['model_pack'] = modelpack.id
+
+        if cdb_search_filter:
+            payload['cdb_search_filter'] = [cdb_search_filter.id]
+
+        if cuis_file:
+            with open(cuis_file, 'rb') as f:
+                resp = requests.post(f'{self.server}/api/project-annotate-entities/', data=payload, files={'cuis_file': f}, headers=self.headers)
+        else:
+            resp = requests.post(f'{self.server}/api/project-annotate-entities/', data=payload, headers=self.headers)
+        if 200 <= resp.status_code < 300:
+            resp_json = json.loads(resp.text)
+            return MCTProject(id=resp_json['id'], name=name, description=description, cuis=cuis,
+                              dataset=dataset, concept_db=concept_db, vocab=vocab, members=members,
+                              meta_tasks=meta_tasks, rel_tasks=rel_tasks)
+        else:
+            raise MCTUtilsException(f'Failed to create project with name: {name}', resp.text)
+
+    def create_dataset(self, name: str, dataset_file: str):
+        """Create a new dataset in the MedCATTrainer session.
+
+        Args:
+            name (str): The name of the dataset.
+            dataset_file (str): The path to the dataset file.
+
+        Raises:
+            MCTUtilsException: If the dataset creation fails
+
+        Returns:
+            MCTDataset: The created dataset
+        """
+        resp = requests.post(f'{self.server}/api/datasets/', headers=self.headers,
+                             data={'name': name},
+                             files={'original_file': open(dataset_file, 'rb')})
+        if 200 <= resp.status_code < 300:
+            resp_json = json.loads(resp.text)
+            return MCTDataset(name=name, id=resp_json['id'])
+        else:
+            raise MCTUtilsException(f'Failed to create dataset with name: {name}', resp.text)
+
+    def create_user(self, username: str, password):
+        """Create a new user in the MedCATTrainer session.
+
+        Args:
+            username (str): The username of the new user.
+            password (str): The password of the new user.
+
+        Raises:
+            MCTUtilsException: If the user creation fails
+
+        Returns:
+            MCTUser: The created user
+        """
+        payload = {
+            'username': username,
+            'password': password
+        }
+        resp = requests.post(f'{self.server}/api/users/', json=payload, headers=self.headers)
+        if 200 <= resp.status_code < 300:
+            resp_json = json.loads(resp.text)
+            return MCTUser(username=username, id=resp_json['id'])
+        else:
+            raise MCTUtilsException(f'Failed to create new user with username: {username}', resp.text)
+
+    def create_medcat_model(self, cdb:MCTConceptDB, vocab: MCTVocab):
+        """Create a new MedCAT cdb and vocab model in the MedCATTrainer session.
+
+        Args:
+            cdb (MCTConceptDB): The concept database to be created.
+            vocab (MCTVocab): The vocabulary to be created.
+
+        Raises:
+            MCTUtilsException: If the model creation fails
+        """
+        resp = requests.post(f'{self.server}/api/concept-dbs/', headers=self.headers,
+                             data={'name': cdb.name, 'use_for_training': cdb.use_for_training},
+                             files={'cdb_file': open(cdb.conceptdb_file, 'rb')})
+        if 200 <= resp.status_code < 300:
+            resp_json = json.loads(resp.text)
+            cdb.id = resp_json['id']
+        else:
+            raise MCTUtilsException(f'Failed uploading MedCAT cdb model: {cdb}', resp.text)
+
+        resp = requests.post(f'{self.server}/api/vocabs/', headers=self.headers,
+                             data={'name': vocab.name},
+                             files={'vocab_file': open(vocab.vocab_file, 'rb')})
+        if 200 <= resp.status_code < 300:
+            resp_json = json.loads(resp.text)
+            vocab.id = resp_json['id']
+        else:
+            raise MCTUtilsException(f'Failed uploading MedCAT vocab model: {vocab}', resp.text)
+
+        return cdb, vocab
+
+    def create_medcat_model_pack(self, model_pack: MCTModelPack):
+        """Create a new MedCAT model pack in the MedCATTrainer session.
+
+        Args:
+            model_pack (MCTModelPack): The model pack to be created.
+
+        Raises:
+            MCTUtilsException: If the model pack creation fails
+        """
+        resp = requests.post(f'{self.server}/api/modelpacks/', headers=self.headers,
+                             data={'name': model_pack.name},
+                             files={'model_pack': open(model_pack.model_pack_zip, 'rb')})
+        if 200 <= resp.status_code < 300:
+            resp_json = json.loads(resp.text)
+            model_pack.id = resp_json['id']
+        else:
+            raise MCTUtilsException(f'Failed uploading model pack: {model_pack.model_pack_zip}', resp.text)
+
+    def get_users(self) -> List[MCTUser]:
+        """Get all users in the MedCATTrainer instance.
+
+        Returns:
+            List[MCTUser]: A list of all users in the MedCATTrainer instance
+        """
+        users = json.loads(requests.get(f'{self.server}/api/users/', headers=self.headers).text)['results']
+        return [MCTUser(id=u['id'], username=u['username']) for u in users]
+
+    def get_models(self) -> Tuple[List[str], List[str]]:
+        """Get all MedCAT cdb and vocab models in the MedCATTrainer instance.
+
+        Returns:
+            Tuple[List[MCTConceptDB], List[MCTVocab]]: A tuple of lists of all MedCAT cdb and vocab models in the MedCATTrainer instance
+        """
+        cdbs = json.loads(requests.get(f'{self.server}/api/concept-dbs/', headers=self.headers).text)['results']
+        vocabs = json.loads(requests.get(f'{self.server}/api/vocabs/', headers=self.headers).text)['results']
+        mct_cdbs = [MCTConceptDB(id=cdb['id'], name=cdb['name'], conceptdb_file=cdb['cdb_file']) for cdb in cdbs]
+        mct_vocabs = [MCTVocab(id=v['id'], name=v['name'], vocab_file=v['vocab_file']) for v in vocabs]
+        return mct_cdbs, mct_vocabs
+
+    def get_model_packs(self) -> List[MCTModelPack]:
+        """Get all MedCAT model packs in the MedCATTrainer instance.
+
+        Returns:
+            List[MCTModelPack]: A list of all MedCAT model packs in the MedCATTrainer instance
+        """
+        resp = json.loads(requests.get(f'{self.server}/api/modelpacks/', headers=self.headers).text)['results']
+        mct_model_packs = [MCTModelPack(id=mp['id'], name=mp['name'], model_pack_zip=mp['model_pack']) for mp in resp]
+        return mct_model_packs
+
+    def get_meta_tasks(self) -> List[MCTMetaTask]:
+        """Get all MedCAT meta tasks that have been created in the MedCATTrainer instance.
+
+        Returns:
+            List[MCTMetaTask]: A list of all MedCAT meta tasks in the MedCATTrainer instance
+        """
+        resp = json.loads(requests.get(f'{self.server}/api/meta-tasks/', headers=self.headers).text)['results']
+        mct_meta_tasks = [MCTMetaTask(name=mt['name'], id=mt['id']) for mt in resp]
+        return mct_meta_tasks
+
+    def get_rel_tasks(self) -> List[MCTRelTask]:
+        """Get all MedCAT relation tasks that have been created in the MedCATTrainer instance.
+
+        Returns:
+            List[MCTRelTask]: A list of all MedCAT relation tasks in the MedCATTrainer instance
+        """
+        resp = json.loads(requests.get(f'{self.server}/api/relations/', headers=self.headers).text)['results']
+        mct_rel_tasks = [MCTRelTask(name=rt['label'], id=rt['id']) for rt in resp]
+        return mct_rel_tasks
+
+    def get_projects(self) -> List[MCTProject]:
+        """Get all MedCAT annotation projects that have been created in the MedCATTrainer instance.
+
+        Returns:
+            List[MCTProject]: A list of all MedCAT annotation projects in the MedCATTrainer instance
+        """
+        resp = json.loads(requests.get(f'{self.server}/api/project-annotate-entities/', headers=self.headers).text)['results']
+        mct_projects = [MCTProject(id=p['id'], name=p['name'], description=p['description'], cuis=p['cuis'],
+                                    dataset=MCTDataset(id=p['id']),
+                                    concept_db=MCTConceptDB(id=p['concept_db']),
+                                    vocab=MCTVocab(id=p['vocab']),
+                                    members=[MCTUser(id=u) for u in p['members']],
+                                    meta_tasks=[MCTMetaTask(id=mt) for mt in p['tasks']],
+                                    rel_tasks=[MCTRelTask(id=rt) for rt in p['relations']]) for p in resp]
+        return mct_projects
+
+    def get_datasets(self) -> List[MCTDataset]:
+        """Get all datasets that have been created in the MedCATTrainer instance.
+
+        Returns:
+            List[MCTDataset]: A list of all datasets in the MedCATTrainer instance
+        """
+        resp = json.loads(requests.get(f'{self.server}/api/datasets/', headers=self.headers).text)['results']
+        mct_datasets = [MCTDataset(name=d['name'], dataset_file=d['original_file'], id=d['id']) for d in resp]
+        return mct_datasets
+
+    def get_project_annos(self, projects: List[MCTProject]):
+        """Get the annotations for a list of projects. Schema is documented here: https://github.com/medcat/MedCATtrainer/blob/main/docs/api.md#download-annotations
+
+        Args:
+            projects (List[MCTProject]): A list of projects to get annotations for
+
+        Returns:
+            List[MCTProject]: A list of all projects with annotations
+        """
+        if any(p.id is None for p in projects):
+            raise MCTUtilsException('One or more project.id are None and all are required to download annotations')
+
+        resp = json.loads(requests.get(f'{self.server}/api/download-annos/?project_ids={",".join([str(p.id) for p in projects])}&with_text=1',
+                                       headers=self.headers).text)
+        return resp
+
+    def __str__(self) -> str:
+        return f'{self.server} \t {self.username} \t {self.password}'
+
+
+class MCTUtilsException(Exception):
+    """Base exception for MedCAT Trainer API errors"""
+    def __init__(self, message, original_exception=None):
+        self.message = message
+        self.original_exception = original_exception
+        super().__init__(self.message)
+
+    def __str__(self):
+        return f'{self.message} \n {self.original_exception}'
+
diff --git a/client/pyproject.toml b/client/pyproject.toml
new file mode 100644
index 00000000..6ac6a684
--- /dev/null
+++ b/client/pyproject.toml
@@ -0,0 +1,18 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "medcattrainer-client"
+version = "1.0.0"
+description = "Python client for interacting with a MedCATTrainer instance"
+readme = "client/README.md"
+requires-python = ">=3.10"
+license = { file = "LICENSE" }
+authors = [{ name = "Tom Searle", email = "tom@cogstack.org" }]
+dependencies = ["requests"]
+
+[project.urls]
+Homepage = "https://github.com/CogStack/MedCATtrainer/"
+Documentation = "https://medcattrainer.readthedocs.io/en/latest/"
+Source = "https://github.com/CogStack/MedCATtrainer/"
diff --git a/client/tests/test_mctclient.py b/client/tests/test_mctclient.py
new file mode 100644
index 00000000..c06b8ae6
--- /dev/null
+++ b/client/tests/test_mctclient.py
@@ -0,0 +1,119 @@
+import json
+import unittest
+from unittest.mock import patch, MagicMock
+from mctclient import (
+    MedCATTrainerSession, MCTDataset, MCTConceptDB, MCTVocab, MCTModelPack, MCTMetaTask, MCTRelTask, MCTUser, MCTProject
+)
+
+class TestMCTClient(unittest.TestCase):
+
+    @patch('mctclient.requests.post')
+    @patch('mctclient.requests.get')
+    def test_session_get_projects(self, mock_get, mock_post):
+        # Mock authentication
+        mock_post.return_value = MagicMock(status_code=200, text='{"token": "abc"}')
+        # Mock get_projects with a real project structure
+        mock_project = {
+            "id": 1,
+            "name": "Test Project",
+            "description": "A test project",
+            "cuis": "C001,C002",
+            "dataset": 10,
+            "concept_db": 20,
+            "vocab": 30,
+            "members": [100, 101],
+            "tasks": [200],
+            "relations": [300]
+        }
+        mock_get.return_value = MagicMock(
+            status_code=200,
+            text=json.dumps({"results": [mock_project]})
+        )
+        session = MedCATTrainerSession(server='http://localhost', username='u', password='p')
+        projects = session.get_projects()
+        self.assertIsInstance(projects, list)
+        self.assertEqual(len(projects), 1)
+        project = projects[0]
+        self.assertIsInstance(project, MCTProject)
+        self.assertEqual(project.name, "Test Project")
+        self.assertEqual(project.description, "A test project")
+        self.assertEqual(project.cuis, "C001,C002")
+        self.assertIsInstance(project.dataset, MCTDataset)
+        self.assertIsInstance(project.concept_db, MCTConceptDB)
+        self.assertIsInstance(project.vocab, MCTVocab)
+        self.assertTrue(all(isinstance(m, MCTUser) for m in project.members))
+        self.assertTrue(all(isinstance(mt, MCTMetaTask) for mt in project.meta_tasks))
+        self.assertTrue(all(isinstance(rt, MCTRelTask) for rt in project.rel_tasks))
+
+    @patch('mctclient.requests.post')
+    def test_create_project(self, mock_post):
+        # Mock authentication
+        def post_side_effect(url, *args, **kwargs):
+            if url.endswith('/api/api-token-auth/'):
+                return MagicMock(status_code=200, text='{"token": "abc"}')
+            elif url.endswith('/api/project-annotate-entities/'):
+                # Return a response with all fields needed for MCTProject
+                return MagicMock(
+                    status_code=200,
+                    text=json.dumps({
+                        'id': '3',
+                        'name': 'My Project',
+                        'description': 'desc',
+                        'cuis': 'C001,C002',
+                        'dataset': '2',
+                        'concept_db': '20',
+                        'vocab': '30',
+                        'members': ['1'],
+                        'tasks': ['200'],
+                        'relations': ['300']
+                    }),
+                    json=lambda: {
+                        'id': '3',
+                        'name': 'My Project',
+                        'description': 'desc',
+                        'cuis': 'C001,C002',
+                        'dataset': '2',
+                        'concept_db': '20',
+                        'vocab': '30',
+                        'members': ['1'],
+                        'tasks': ['200'],
+                        'relations': ['300']
+                    }
+                )
+            else:
+                return MagicMock(status_code=404, text='')
+
+        mock_post.side_effect = post_side_effect
+
+        session = MedCATTrainerSession(server='http://localhost', username='u', password='p')
+        user = MCTUser(id='1', username='testuser')
+        dataset = MCTDataset(id='2', name='TestDS', dataset_file='file.csv')
+        concept_db = MCTConceptDB(id='20', name='testCDB', conceptdb_file='cdb.dat')
+        vocab = MCTVocab(id='30', name='testVocab', vocab_file='vocab.dat')
+        meta_task = MCTMetaTask(id='200', name='TestMetaTask')
+        rel_task = MCTRelTask(id='300', name='TestRelTask')
+
+        project = session.create_project(
+            name='My Project',
+            description='desc',
+            cuis='C001,C002',
+            members=[user],
+            dataset=dataset,
+            concept_db=concept_db,
+            vocab=vocab,
+            meta_tasks=[meta_task],
+            rel_tasks=[rel_task]
+        )
+        self.assertIsInstance(project, MCTProject)
+        self.assertEqual(project.name, 'My Project')
+        self.assertEqual(project.description, 'desc')
+        self.assertEqual(project.cuis, 'C001,C002')
+        self.assertIsInstance(project.dataset, MCTDataset)
+        self.assertIsInstance(project.concept_db, MCTConceptDB)
+        self.assertIsInstance(project.vocab, MCTVocab)
+        self.assertEqual(project.members, [user])
+        self.assertEqual(project.meta_tasks, [meta_task])
+        self.assertEqual(project.rel_tasks, [rel_task])
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file

From af516f823128ccae77eba36092cd393c9405d3c8 Mon Sep 17 00:00:00 2001
From: Tom Searle <tom@cogstack.org>
Date: Tue, 22 Jul 2025 10:38:34 +0100
Subject: [PATCH 2/4] CU-8699wjhfu: add build dep

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1c737e66..b0a00cab 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,7 +20,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install requests pytest
+          pip install requests pytest build
 
       - name: Install client package in development mode
         run: |

From d30278d17d08349bd6495cbd7ae4d2a457a5e014 Mon Sep 17 00:00:00 2001
From: Tom Searle <tom@cogstack.org>
Date: Tue, 22 Jul 2025 14:33:24 +0100
Subject: [PATCH 3/4] CU-8699wjhfu: add client docs to docs site

---
 docs/client.md | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++
 docs/index.rst |  3 +-
 2 files changed, 90 insertions(+), 1 deletion(-)
 create mode 100644 docs/client.md

diff --git a/docs/client.md b/docs/client.md
new file mode 100644
index 00000000..d5d13132
--- /dev/null
+++ b/docs/client.md
@@ -0,0 +1,88 @@
+
+---
+
+# MedCATtrainer Client
+
+A Python client for interacting with a MedCATTrainer web application instance. This package allows you to manage datasets, concept databases, vocabularies, model packs, users, projects, and more via Python code or the command line.
+
+## Features
+
+- Manage datasets, concept databases, vocabularies, and model packs
+- Create and manage users and projects
+- Retrieve and upload project annotations
+- Command-line interface (CLI) for automation
+
+## Installation
+
+```sh
+pip install mctclient
+```
+
+Or, if installing from source:
+
+```sh
+cd client
+python -m build
+pip install dist/*.whl
+```
+
+## Python Usage
+
+```sh
+export MCTRAINER_USERNAME=<username>
+export MCTRAINER_PASSWORD=<password>
+```
+
+```python
+from mctclient import MedCATTrainerSession, MCTDataset, MCTConceptDB, MCTVocab, MCTModelPack, MCTMetaTask, MCTRelTask, MCTUser, MCTProject
+
+# Connect to your MedCATTrainer instance
+session = MedCATTrainerSession(server="http://localhost:8001")
+
+# List all projects
+projects = session.get_projects()
+for project in projects:
+    print(project)
+
+# Create a new dataset
+dataset = session.create_dataset(name="My Dataset", dataset_file="path/to/data.csv")
+
+# Create a new user
+user = session.create_user(username="newuser", password="password123")
+
+# Create a new project
+project = session.create_project(
+    name="My Project",
+    description="A new annotation project",
+    members=[user],
+    dataset=dataset
+)
+```
+
+### MedCATTrainerSession Methods
+
+- `create_project(name, description, members, dataset, cuis=[], cuis_file=None, concept_db=None, vocab=None, cdb_search_filter=None, modelpack=None, meta_tasks=[], rel_tasks=[])`
+- `create_dataset(name, dataset_file)`
+- `create_user(username, password)`
+- `create_medcat_model(cdb, vocab)`
+- `create_medcat_model_pack(model_pack)`
+- `get_users()`
+- `get_models()`
+- `get_model_packs()`
+- `get_meta_tasks()`
+- `get_rel_tasks()`
+- `get_projects()`
+- `get_datasets()`
+- `get_project_annos(projects)`
+
+Each method returns the corresponding object or a list of objects.
+
+## License
+
+This project is licensed under the Apache 2.0 License.
+
+## Contributing
+
+Pull requests are welcome! For major changes, please open an issue first to discuss what you would like to change.
+
+
diff --git a/docs/index.rst b/docs/index.rst
index 46f3841e..b7758d22 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -19,7 +19,8 @@ Welcome to MedCATtrainer's documentation!
    annotator_guide.md
    meta_annotations.md
    advanced_usage.md
-   maintanence.md
+   maintenance.md
+   client.md
 
 
 Indices and tables

From 18df430c3c5a6c45519a5bfad202d77784d1699f Mon Sep 17 00:00:00 2001
From: Tom Searle <tom@cogstack.org>
Date: Tue, 22 Jul 2025 14:53:34 +0100
Subject: [PATCH 4/4] CU-8699wjhfu: add example .ipyb example of client API

---
 notebook_docs/Client_API_Tutorials.ipynb | 485 +++++++++++++++++++++++
 1 file changed, 485 insertions(+)
 create mode 100644 notebook_docs/Client_API_Tutorials.ipynb

diff --git a/notebook_docs/Client_API_Tutorials.ipynb b/notebook_docs/Client_API_Tutorials.ipynb
new file mode 100644
index 00000000..3cecbdcb
--- /dev/null
+++ b/notebook_docs/Client_API_Tutorials.ipynb
@@ -0,0 +1,485 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Client API Tutorial\n",
+    "\n",
+    "This notebook demonstrates how to use the `MedCATTrainerSession` class to interact with the MedCATTrainer API. We'll cover:\n",
+    "\n",
+    "1. Setting up a MedCATTrainer session\n",
+    "2. Exploring available resources (users, datasets, models)\n",
+    "3. Creating new resources (datasets, models, users)\n",
+    "4. Creating annotation projects with different approaches\n",
+    "5. Downloading and saving annotations\n",
+    "\n",
+    "These steps provide a complete workflow for programmatically managing medical text annotation projects with MedCATTrainer."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<u>__SETUP:__</u>\n",
+    "\n",
+    "You need to have [MedCATtrainer service running locally](http://localhost:8001/)\n",
+    "\n",
+    "The default credentials when setup is:\n",
+    "\n",
+    "```bash\n",
+    "username: admin\n",
+    "password: admin\n",
+    "```\n",
+    "\n",
+    "The administrative console can be found here: http://localhost:8001/admin/\n",
+    "\n",
+    "Within this admin console is where you can manually interact the the MedCATtrainer program and setup projects\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Setup and Authentication\n",
+    "\n",
+    "First, let's import the necessary classes and set up our session:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import json\n",
+    "import sys\n",
+    "sys.path.append('../client')\n",
+    "from mctclient import MedCATTrainerSession, MCTDataset, MCTConceptDB, MCTVocab, MCTModelPack, MCTUser, MCTProject"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize the session\n",
+    "\n",
+    "# Set environment variables for authentication, These are default and are optional.\n",
+    "os.environ['MCTRAINER_USERNAME'] = 'admin'\n",
+    "os.environ['MCTRAINER_PASSWORD'] = 'admin'\n",
+    "mct_server = 'http://localhost:8001' # Default server is http://localhost:8001 if not specified\n",
+    "# session = MedCATTrainerSession()\n",
+    "\n",
+    "# Initialize the session and change explicit arguements if required.\n",
+    "session = MedCATTrainerSession(server=mct_server, username='admin', password='admin') # Wrapper for the MedCATTrainer API."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Explore Available Resources\n",
+    "\n",
+    "Let's check what resources are already available in the MedCATTrainer instance:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Users:\n",
+      "3 : annotator2\n",
+      "2 : annotator1\n",
+      "1 : admin\n",
+      "\n",
+      "Datasets:\n",
+      "1 : Example Dataset \t http://localhost:8001/media/Example_Dataset.csv\n",
+      "2 : Neurology Notes \t http://localhost:8001/media/neurology_notes.csv\n",
+      "3 : SG-example-docs \t http://localhost:8001/media/sg-sample-docs.csv\n",
+      "\n",
+      "Concept DBs:\n",
+      "1 : umls_cdb \t http://localhost:8001/media/cdb.dat\n",
+      "2 : snomed_cdb \t http://localhost:8001/media/snomed-cdb.dat\n",
+      "3 : snomed_2022_modelpack_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/20230227__kch_gstt_trained_model_494c3717f637bb89/cdb.dat\n",
+      "8 : medcat_full_pack_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/medcat_model_pack_u3fB9G5/cdb.dat\n",
+      "12 : snomed-2023-bert-metacats_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/20230227__kch_gstt_trained_model_bert_metacats_138689a7bb83cb0a/cdb.dat\n",
+      "13 : de_id_modelpack_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/medcat_deid_trained_a7120281ebb9fc9e/cdb.dat\n",
+      "\n",
+      "Vocabularies:\n",
+      "1 : http://localhost:8001/media/vocab.dat\n",
+      "3 : http://localhost:8001/media/20230227__kch_gstt_trained_model_494c3717f637bb89/vocab.dat\n",
+      "12 : http://localhost:8001/media/20230227__kch_gstt_trained_model_bert_metacats_138689a7bb83cb0a/vocab.dat\n",
+      "\n",
+      "ModelPacks:\n",
+      "1 : snomed_2022_modelpack \t http://localhost:8001/media/20230227__kch_gstt_trained_model_494c3717f637bb89.zip\n",
+      "9 : snomed-2023-bert-metacats \t http://localhost:8001/media/20230227__kch_gstt_trained_model_bert_metacats_138689a7bb83cb0a.zip\n",
+      "10 : de-id modelpack \t http://localhost:8001/media/medcat_deid_trained_a7120281ebb9fc9e.zip\n",
+      "\n",
+      "Meta Tasks:\n",
+      "1 : Experiencer\n",
+      "2 : Presence\n",
+      "3 : Subject\n",
+      "4 : Temporality\n",
+      "5 : Time\n",
+      "\n",
+      "Relation Tasks:\n",
+      "1 : Spatial\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Get users\n",
+    "users = session.get_users()\n",
+    "print(\"Users:\")\n",
+    "for user in users:\n",
+    "    print(user)\n",
+    "print()\n",
+    "\n",
+    "# Get datasets\n",
+    "datasets = session.get_datasets()\n",
+    "print(\"Datasets:\")\n",
+    "for dataset in datasets:\n",
+    "    print(dataset)\n",
+    "print()\n",
+    "\n",
+    "# Get concept databases and vocabularies\n",
+    "concept_dbs, vocabs = session.get_models()\n",
+    "print(\"Concept DBs:\")\n",
+    "for cdb in concept_dbs:\n",
+    "    print(cdb)\n",
+    "print()\n",
+    "print(\"Vocabularies:\")\n",
+    "for vocab in vocabs:\n",
+    "    print(vocab)\n",
+    "print()\n",
+    "\n",
+    "# Get modelpacks\n",
+    "model_packs = session.get_model_packs()\n",
+    "print(\"ModelPacks:\")\n",
+    "for model_pack in model_packs:\n",
+    "    print(model_pack)\n",
+    "print()\n",
+    "\n",
+    "# Get meta tasks\n",
+    "meta_tasks = session.get_meta_tasks()\n",
+    "print(\"Meta Tasks:\")\n",
+    "for i, task in enumerate(meta_tasks):\n",
+    "    print(f\"{i+1} : {task.name}\")\n",
+    "print()\n",
+    "\n",
+    "# Get relation tasks\n",
+    "rel_tasks = session.get_rel_tasks()\n",
+    "print(\"Relation Tasks:\")\n",
+    "for i, task in enumerate(rel_tasks):\n",
+    "    print(f\"{i+1} : {task.name}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Upload new resources to MedCATtrainer\n",
+    "\n",
+    "Before we create a project we need to create and upload all the required resources. We'll start with a dataset:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a new dataset to be annotated.\n",
+    "neurology_dataset = session.create_dataset(\n",
+    "    name=\"Neurology Notes\",  # Names must be unique\n",
+    "    dataset_file=\"./example_data/neuro.csv\"  # This csv should have atleast these 2 columns. [\"name\", \"text\"]\n",
+    ")\n",
+    "print(f\"Created dataset: {neurology_dataset}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.1 Creating MedCAT Models\n",
+    "\n",
+    "We have two options for creating models:\n",
+    "\n",
+    "1. Upload separate CDB and Vocab files\n",
+    "2. Upload a complete model pack ZIP\n",
+    "\n",
+    "Let's explore both approaches:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If you don't have these medcat components or modelpack. You can download an example here:\n",
+    "# Download vocab.dat\n",
+    "!wget -O ./example_data/vocab.dat https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/vocab.dat\n",
+    "# Download snomed-cdb-mc-v1.cdb\n",
+    "!wget -O ./example_data/snomed-cdb-mc-v1.cdb https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/snomed-cdb-mc-v1.cdb\n",
+    "# Download model pack (this is a zip file)\n",
+    "!wget -O ./example_data/medcat_model_pack.zip https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/medcat_model_pack_c4e0d25701ce4e88.zip\n",
+    "\n",
+    "# Otherwise Skip this"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Option 1: Upload separate CDB and Vocab files\n",
+    "example_cdb = MCTConceptDB(name=\"example_cdbv1\", conceptdb_file=\"./example_data/snomed-cdb-mc-v1.cdb\")\n",
+    "example_vocab = MCTVocab(name=\"example_vocabv2\", vocab_file=\"./example_data/vocab.dat\")\n",
+    "\n",
+    "# Create the model in the MedCATTrainer instance\n",
+    "cdb, vocab = session.create_medcat_model(example_cdb, example_vocab)\n",
+    "print(f\"Created CDB: {cdb}\")\n",
+    "print(f\"Created Vocab: {vocab}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Option 2: Upload a complete modelpack ZIP\n",
+    "# This contains CDB, Vocab, and potentially MetaCAT and RelCAT models\n",
+    "medcat_model_pack = MCTModelPack(\n",
+    "    name=\"medcat_full_pack\",\n",
+    "    model_pack_zip=\"./medcat_model_pack.zip\"\n",
+    ")\n",
+    "session.create_medcat_model_pack(medcat_model_pack)\n",
+    "print(f\"Created model pack: {medcat_model_pack}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3.2 Creating a New User\n",
+    "\n",
+    "If we need to add an annotator to our project:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "new_user = session.create_user(username=\"annotator1\", password=\"secure_password\")\n",
+    "print(f\"Created user: {new_user}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Creating Annotation Projects\n",
+    "\n",
+    "Now we can create annotation projects using our resources:\n",
+    "\n",
+    "But first, Let's check again what resources are now available in the MedCATTrainer instance after Part 3:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get users\n",
+    "users = session.get_users()\n",
+    "print(\"Users:\")\n",
+    "for user in users:\n",
+    "    print(user)\n",
+    "print()\n",
+    "\n",
+    "# Get datasets\n",
+    "datasets = session.get_datasets()\n",
+    "print(\"Datasets:\")\n",
+    "for dataset in datasets:\n",
+    "    print(dataset)\n",
+    "print()\n",
+    "\n",
+    "# Get concept databases and vocabularies\n",
+    "concept_dbs, vocabs = session.get_models()\n",
+    "print(\"Concept DBs:\")\n",
+    "for cdb in concept_dbs:\n",
+    "    print(cdb)\n",
+    "print()\n",
+    "print(\"Vocabularies:\")\n",
+    "for vocab in vocabs:\n",
+    "    print(vocab)\n",
+    "print()\n",
+    "\n",
+    "# Get modelpacks\n",
+    "model_packs = session.get_model_packs()\n",
+    "print(\"ModelPacks:\")\n",
+    "for model_pack in model_packs:\n",
+    "    print(model_pack)\n",
+    "print()\n",
+    "\n",
+    "# Get meta tasks\n",
+    "meta_tasks = session.get_meta_tasks()\n",
+    "print(\"Meta Tasks:\")\n",
+    "for i, task in enumerate(meta_tasks):\n",
+    "    print(f\"{i+1} : {task.name}\")\n",
+    "print()\n",
+    "\n",
+    "# Get relation tasks\n",
+    "rel_tasks = session.get_rel_tasks()\n",
+    "print(\"Relation Tasks:\")\n",
+    "for i, task in enumerate(rel_tasks):\n",
+    "    print(f\"{i+1} : {task.name}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Method 1: Create a project with separate CDB and Vocab\n",
+    "neuro_project = session.create_project(\n",
+    "    name=\"Neurology Annotation Project\",\n",
+    "    description=\"Demo annotation project of neurology conditions, epilepsy & seizure\",\n",
+    "    members=[user for user in users],  # Add all users...\n",
+    "    dataset=datasets[-1],\n",
+    "    concept_db=concept_dbs[-1],\n",
+    "    vocab=vocabs[-1],\n",
+    "    cuis=[\"84757009\", \"91175000\"],  # Whitelist Filter CUIs/concepts\n",
+    "    #meta_tasks=[\"Temporality\", \"Certainty\"],  # Can specify by name or by object\n",
+    "    #rel_tasks=[\"Has_Finding\"] # only add this relational extraction task if absolutely required\n",
+    ")\n",
+    "\n",
+    "print(f\"Created project: {neuro_project}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Method 2: Create a project with a modelpack\n",
+    "\n",
+    "# Rerun the explore resources to run the following code:\n",
+    "general_project = session.create_project(\n",
+    "    name=\"Demo General Medical Annotation\",\n",
+    "    description=\"Annotation of neurology medical conditions\",\n",
+    "    members=[user for user in users],  # All users\n",
+    "    dataset=datasets[-1],  # Use existing dataset\n",
+    "    modelpack=model_packs[-1],  # Use existing model pack\n",
+    "    # cuis_file=\"./resources/mct_filter.json\",  # Load whitelist concepts from a file [\"concept1\", \"concept2\"]\n",
+    ")\n",
+    "\n",
+    "print(f\"Created project with model pack: {general_project}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Retrieving Project Annotations\n",
+    "\n",
+    "After annotators have worked on the projects, we can download the annotations:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get all projects\n",
+    "mct_projects = session.get_projects()\n",
+    "\n",
+    "# Download annotations for all projects\n",
+    "projects = session.get_project_annos(mct_projects)\n",
+    "\n",
+    "print(f\"Downloaded annotations for {len(mct_projects)} projects:\")\n",
+    "for p in projects['projects']:\n",
+    "    print(p['name'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Inspect all details from a single export\n",
+    "projects['projects'][0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. Saving Annotations for Analysis\n",
+    "\n",
+    "Finally, let's save the annotations to a file for later analysis:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save MCT export / annotations to a file\n",
+    "with open(\"./example_data/medical_annotations.json\", \"w\") as f:\n",
+    "    json.dump(projects, f, indent=2)\n",
+    "\n",
+    "print(\"Annotations saved to ./example_data/medical_annotations.json\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# End of Tutorial"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "bioext-medcat-env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}