Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/workflows/check-pr-label.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: Check Label on PR
on:
pull_request:
types: [opened, synchronize, labeled, unlabeled]

jobs:
check_pr_label:
runs-on: ubuntu-latest
steps:
- name: Checkout PR
uses: actions/checkout@v6
with:
fetch-depth: 0 # Fetch all history for all branches and tags

- name: Check if the PR contains the label validation or no validation
id: check_pr_label
if: |
! contains( github.event.pull_request.labels.*.name, 'validation') && ! contains( github.event.pull_request.labels.*.name, 'no validation')
run: |
echo "Neither 'validation' nor 'no validation' labels are present."
exit 1 # Exit with a failure
31 changes: 31 additions & 0 deletions .github/workflows/check-version.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Check version update
on:
pull_request:
branches: [ "master" ]
permissions:
contents: read
jobs:
check-version:
runs-on: ubuntu-latest
steps:
- name: Checkout PR
uses: actions/checkout@v6
with:
fetch-depth: 0 # Fetch all history for all branches and tags
- name: Check version update
run: |
PR_NUMBER=${{ github.event.pull_request.number }}
FILE_CHANGED=$(git diff --name-only ${{ github.event.pull_request.base.sha }} HEAD | grep 'pyproject.toml' || true)
if [ -z "$FILE_CHANGED" ]; then
echo "pyproject.toml was not changed in this PR. Please update the version."
exit 1
else
echo "pyproject.toml was changed in this PR."
fi
VERSION_CHANGED=$(git diff ${{ github.event.pull_request.base.sha }} HEAD | grep 'version =' || true)
if [ -z "$VERSION_CHANGED" ]; then
echo "Version in pyproject.toml was not updated. Please update the version."
exit 1
else
echo "Version in pyproject.toml was updated."
fi
58 changes: 58 additions & 0 deletions .github/workflows/lint-code.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: Lint code
on: [push, pull_request]

jobs:
# Use ruff to check for code style violations
ruff-check:
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.14"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ruff
- name: ruff --> Check for style violations
# Configured in pyproject.toml
run: ruff check .

# Use ruff to check code formatting
ruff-format:
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.14"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ruff
- name: ruff --> Check code formatting
run: ruff format --check .

# Use pip-check-reqs/pip-missing-reqs to check for missing dependencies
requirements-check:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.14"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pip-check-reqs

- name: Run pip-check-reqs/pip-missing-reqs
run: |
pip-missing-reqs .
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,22 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6
- name: Set up Python 3.14
uses: actions/setup-python@v3
uses: actions/setup-python@v6
with:
python-version: "3.14"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
pip install pytest pytest-cov
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Install dataflow_transfer
run: pip install -e .
- name: Test with pytest
run: |
pytest
pytest --cov --cov-branch --cov-report=xml
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
9 changes: 5 additions & 4 deletions dataflow_transfer/cli.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import click
import os
import logging
import os

import click
import yaml

from dataflow_transfer.dataflow_transfer import transfer_runs
from dataflow_transfer import log
from dataflow_transfer.dataflow_transfer import transfer_runs
from dataflow_transfer.run_classes.registry import RUN_CLASS_REGISTRY

logger = logging.getLogger(__name__)


def load_config(config_file_path):
with open(config_file_path, "r") as file:
with open(config_file_path) as file:
config = yaml.safe_load(file)
return config

Expand Down
6 changes: 3 additions & 3 deletions dataflow_transfer/dataflow_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import time

from dataflow_transfer.run_classes.registry import RUN_CLASS_REGISTRY
from dataflow_transfer.utils.filesystem import get_run_dir, find_runs
from dataflow_transfer.utils.filesystem import find_runs, get_run_dir

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -30,7 +30,7 @@ def process_run(run_dir, sequencer, config):
## Sequencing ongoing. Start background transfer if not already running.
if run.sequencing_ongoing:
run.update_statusdb(status="sequencing_started")
run.initiate_background_transfer()
run.start_transfer(final=False)
return

## Sequencing finished but transfer not complete. Start final transfer.
Expand All @@ -41,7 +41,7 @@ def process_run(run_dir, sequencer, config):
"Will attempt final transfer again."
)
run.update_statusdb(status="sequencing_finished")
run.do_final_transfer()
run.start_transfer(final=True)
return

## Final transfer completed successfully. Update statusdb.
Expand Down
15 changes: 7 additions & 8 deletions dataflow_transfer/run_classes/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# This adds the run classes to the registry. Do not remove.

from .registry import RUN_CLASS_REGISTRY


from dataflow_transfer.run_classes.element_runs import AVITIRun # noqa: F401, I001
from dataflow_transfer.run_classes.illumina_runs import (
NovaSeqXPlusRun,
NextSeqRun,
MiSeqRun,
MiSeqRun, # noqa: F401
NextSeqRun, # noqa: F401
NovaSeqXPlusRun, # noqa: F401
)
from dataflow_transfer.run_classes.ont_runs import PromethIONRun, MinIONRun
from dataflow_transfer.run_classes.element_runs import AVITIRun
from dataflow_transfer.run_classes.ont_runs import MinIONRun, PromethIONRun # noqa: F401

from .registry import RUN_CLASS_REGISTRY # noqa: F401
1 change: 1 addition & 0 deletions dataflow_transfer/run_classes/element_runs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dataflow_transfer.run_classes.generic_runs import Run

from .registry import register_run_class


Expand Down
49 changes: 15 additions & 34 deletions dataflow_transfer/run_classes/generic_runs.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import os
import logging
import os
import re
from datetime import datetime
from dataflow_transfer.utils.statusdb import StatusdbSession

import dataflow_transfer.utils.filesystem as fs
from dataflow_transfer.utils.statusdb import StatusdbSession

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -65,53 +66,33 @@ def generate_rsync_command(self, is_final_sync=False):
command_str += f"; echo $? > {self.final_rsync_exitcode_file}"
return command_str

def initiate_background_transfer(self):
def start_transfer(self, final=False):
"""Start background rsync transfer to storage."""
background_transfer_command = self.generate_rsync_command(is_final_sync=False)
transfer_command = self.generate_rsync_command(is_final_sync=final)
if fs.rsync_is_running(src=self.run_dir):
logger.info(
f"Rsync is already running for {self.run_dir}. Skipping background transfer initiation."
)
return
try:
fs.submit_background_process(background_transfer_command)
fs.submit_background_process(transfer_command)
logger.info(
f"{self.run_id}: Started background rsync to {self.miarka_destination}"
+ f" with the following command: '{background_transfer_command}'"
f"{self.run_id}: Started rsync to {self.miarka_destination}"
+ f" with the following command: '{transfer_command}'"
)
except Exception as e:
logger.error(f"Failed to start background transfer for {self.run_id}: {e}")
logger.error(f"Failed to start rsync for {self.run_id}: {e}")
raise e
rsync_info = {
"command": background_transfer_command,
"command": transfer_command,
"destination_path": self.miarka_destination,
}
self.update_statusdb(status="transfer_started", additional_info=rsync_info)

def do_final_transfer(self):
"""Start final rsync transfer to storage."""
final_transfer_command = self.generate_rsync_command(is_final_sync=True)
if fs.rsync_is_running(src=self.run_dir):
logger.info(
f"Rsync is already running for {self.run_dir}. Skipping final transfer initiation."
)
return
try:
fs.submit_background_process(final_transfer_command)
logger.info(
f"{self.run_id}: Started FINAL rsync to {self.miarka_destination}"
+ f" with the following command: '{final_transfer_command}'"
if final:
self.update_statusdb(
status="final_transfer_started", additional_info=rsync_info
)
except Exception as e:
logger.error(f"Failed to start final transfer for {self.run_id}: {e}")
raise e
rsync_info = {
"command": final_transfer_command,
"destination_path": self.miarka_destination,
}
self.update_statusdb(
status="final_transfer_started", additional_info=rsync_info
)
else:
self.update_statusdb(status="transfer_started", additional_info=rsync_info)

@property
def final_sync_successful(self):
Expand Down
1 change: 1 addition & 0 deletions dataflow_transfer/run_classes/illumina_runs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dataflow_transfer.run_classes.generic_runs import Run

from .registry import register_run_class


Expand Down
1 change: 1 addition & 0 deletions dataflow_transfer/run_classes/ont_runs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dataflow_transfer.run_classes.generic_runs import Run

from .registry import register_run_class


Expand Down
13 changes: 7 additions & 6 deletions dataflow_transfer/tests/test_filesystem.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
import json
import os
import tempfile
import pytest
from unittest.mock import patch
from subprocess import CalledProcessError
from unittest.mock import patch

import pytest

from dataflow_transfer.utils.filesystem import (
get_run_dir,
check_exit_status,
find_runs,
get_run_dir,
locate_metadata,
parse_metadata_files,
rsync_is_running,
submit_background_process,
parse_metadata_files,
check_exit_status,
locate_metadata,
)


Expand Down
Loading