-
Notifications
You must be signed in to change notification settings - Fork 950
Add draft of FASTQ_REMOVE_ADAPTERS_AND_MERGE subworkflow with tests #9521
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
f2ee256
060b52d
a03e263
e505244
26d5803
dbadf9c
e97aaf4
51369d0
7130774
e188efd
74834d8
f69fa14
2629da3
19b67c4
819155d
53675cf
b8c8f85
6c99c79
28bea76
dabd880
2d52823
60239ed
7b3c6f4
6963d50
76c51e7
36c4abb
9239334
ced4270
66b909c
a68276a
c7ef10c
ca4db62
54863da
697d943
a0b1a41
77cdc8c
209581d
5854017
3001aa7
beba663
8637cb1
a7bc445
68d6485
690f067
2b65b43
08f0cec
8293491
a78cc00
56445cb
66d0bdd
f3f9582
0bac4f6
a939428
9d6f628
7a6165f
24b5c0c
f13b9a9
e2c9156
b686830
196f22c
5649f8a
a0e5a8b
6b2c2fb
ea2d4f0
41ac464
37dbe45
03cae33
1f66a1e
e23f119
2c74f42
1e8cb18
51ffcb6
02ae283
4999717
a0a1406
54a78b6
460be27
0b1640f
f12328e
2ed958f
6546c99
ad92ee7
2f5aefa
3ca7400
efbaae6
f5cb4ca
e73c143
7184754
36a9fff
d7519d6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,128 @@ | ||
| // both SE and PE | ||
| include { TRIMMOMATIC } from '../../../modules/nf-core/trimmomatic/main' | ||
| include { CUTADAPT } from '../../../modules/nf-core/cutadapt/main' | ||
| include { TRIMGALORE } from '../../../modules/nf-core/trimgalore/main' | ||
| include { BBMAP_BBDUK } from '../../../modules/nf-core/bbmap/bbduk/main' | ||
| include { LEEHOM } from '../../../modules/nf-core/leehom/main' | ||
| // both SE and PE, plus merging | ||
| include { FASTP } from '../../../modules/nf-core/fastp/main' | ||
| include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../../../modules/nf-core/adapterremoval/main' | ||
| include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../../../modules/nf-core/adapterremoval/main' | ||
|
|
||
| workflow FASTQ_REMOVEADAPTERS_MERGE { | ||
|
|
||
| take: | ||
| ch_input_reads // channel: [mandatory] meta, reads | ||
| val_adapter_tool // string: [mandatory] tool_name // choose from: ["trimmomatic", "cutadapt", "trimgalore", "bbduk", "leehom", "fastp", "adapterremoval"] | ||
| ch_adapters // channel: [optional] {fasta,txt} // fasta, for bbduk or fastp, or txt, for adapterremoval | ||
| val_save_merged // boolean: [mandatory] if true, will return the merged reads instead, for fastp and adapterremoval | ||
| val_fastp_discard_trimmed_pass // boolean: [mandatory] // only for fastp | ||
| val_fastp_save_trimmed_fail // boolean: [mandatory] // only for fastp | ||
|
|
||
| main: | ||
|
|
||
| ch_discarded_reads = channel.empty() // from trimmomatic, trimgalore, leehom, fastp, adapterremoval | ||
| ch_paired_interleaved = channel.empty() // from adapterremoval | ||
| ch_log = channel.empty() // from trimmomatic, trimgalore, fastp | ||
| ch_report = channel.empty() // from trimmomatic, trimgalore, fastp | ||
| ch_versions = channel.empty() | ||
| ch_multiqc_files = channel.empty() // from trimmomatic, cutadapt, bbduk, leehom, fastp, adapterremoval | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would add a validation check here to make sure whatever is given to |
||
| if (val_adapter_tool == "trimmomatic") { | ||
| TRIMMOMATIC( ch_input_reads ) | ||
|
|
||
| ch_processed_reads = TRIMMOMATIC.out.trimmed_reads | ||
| ch_discarded_reads = ch_discarded_reads.mix(TRIMMOMATIC.out.unpaired_reads.transpose()) // .transpose() because paired reads have 2 unpaired files in an array | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you mean by 2 unpaired files in an array? Do they represent different 'discarded' files? Or the discarded reads from each of the R1/R2? In the latter case does ti makes sense to separated them? |
||
| ch_log = TRIMMOMATIC.out.trim_log | ||
| ch_report = TRIMMOMATIC.out.summary | ||
| ch_versions = ch_versions.mix(TRIMMOMATIC.out.versions.first()) | ||
| ch_multiqc_files = ch_multiqc_files.mix(TRIMMOMATIC.out.out_log) | ||
| } else if (val_adapter_tool == "cutadapt") { | ||
| CUTADAPT( ch_input_reads ) | ||
|
|
||
| ch_processed_reads = CUTADAPT.out.reads | ||
| ch_multiqc_files = ch_multiqc_files.mix(CUTADAPT.out.log) | ||
| } else if (val_adapter_tool == "trimgalore") { | ||
| TRIMGALORE( ch_input_reads ) | ||
|
|
||
| ch_processed_reads = TRIMGALORE.out.reads | ||
| ch_discarded_reads = ch_discarded_reads.mix(TRIMGALORE.out.unpaired) | ||
| ch_log = TRIMGALORE.out.log | ||
| ch_report = TRIMGALORE.out.html.mix(TRIMGALORE.out.zip) | ||
| } else if (val_adapter_tool == "bbduk") { | ||
| BBMAP_BBDUK( ch_input_reads, ch_adapters ) | ||
|
|
||
| ch_processed_reads = BBMAP_BBDUK.out.reads | ||
| ch_versions = ch_versions.mix(BBMAP_BBDUK.out.versions.first()) | ||
| ch_multiqc_files = ch_multiqc_files.mix(BBMAP_BBDUK.out.log) | ||
| } else if (val_adapter_tool == "leehom") { | ||
| LEEHOM( ch_input_reads ) | ||
|
|
||
| ch_processed_reads = LEEHOM.out.fq_pass | ||
| .join(LEEHOM.out.unmerged_r1_fq_pass, by: 0, remainder: true) | ||
| .join(LEEHOM.out.unmerged_r2_fq_pass, by: 0, remainder: true) | ||
| .map { meta, single, r1, r2 -> | ||
| if (meta.single_end) { | ||
| return [meta, single] | ||
| } else { | ||
| return [meta, [r1, r2]] | ||
| } | ||
| } | ||
| ch_discarded_reads = ch_discarded_reads.mix(LEEHOM.out.fq_fail, LEEHOM.out.unmerged_r1_fq_fail, LEEHOM.out.unmerged_r2_fq_fail) | ||
| ch_versions = ch_versions.mix(LEEHOM.out.versions.first()) | ||
| ch_multiqc_files = ch_multiqc_files.mix(LEEHOM.out.log) | ||
| } else if (val_adapter_tool == "fastp") { | ||
| FASTP( | ||
| ch_input_reads.map { meta, files -> [ meta, files, ch_adapters ] }, | ||
| val_fastp_discard_trimmed_pass, | ||
| val_fastp_save_trimmed_fail, | ||
| val_save_merged | ||
| ) | ||
|
|
||
| if (val_save_merged) { | ||
| ch_processed_reads = FASTP.out.reads_merged | ||
| } else { | ||
| ch_processed_reads = FASTP.out.reads | ||
| } | ||
| ch_discarded_reads = ch_discarded_reads.mix(FASTP.out.reads_fail.transpose()) // .transpose() because paired reads have 3 fail files in an array | ||
| ch_log = FASTP.out.log | ||
| ch_report = FASTP.out.html | ||
| ch_versions = ch_versions.mix(FASTP.out.versions.first()) | ||
| ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json) | ||
| } else if (val_adapter_tool == "adapterremoval") { | ||
| ch_adapterremoval_in = ch_input_reads | ||
| .branch { meta, _reads -> | ||
| single: meta.single_end | ||
| paired: !meta.single_end | ||
| } | ||
|
|
||
| ADAPTERREMOVAL_SE( ch_adapterremoval_in.single, ch_adapters ) | ||
| ADAPTERREMOVAL_PE( ch_adapterremoval_in.paired, ch_adapters ) | ||
|
|
||
| if (val_save_merged) { | ||
| ch_processed_reads = ADAPTERREMOVAL_SE.out.collapsed | ||
| .mix( | ||
| ADAPTERREMOVAL_PE.out.collapsed, | ||
| ADAPTERREMOVAL_SE.out.collapsed_truncated, | ||
| ADAPTERREMOVAL_PE.out.collapsed_truncated | ||
| ) | ||
| } else { | ||
| ch_processed_reads = ADAPTERREMOVAL_SE.out.singles_truncated.mix(ADAPTERREMOVAL_PE.out.paired_truncated) | ||
| } | ||
|
Comment on lines
+103
to
+111
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please check against Preparing the AdapterRemoval reads is a little more complicated than just mixing, and you appear to have a mistake (there is no What I would recommend is to offer as 'single' output:
|
||
| ch_discarded_reads = ch_discarded_reads.mix(ADAPTERREMOVAL_SE.out.discarded, ADAPTERREMOVAL_PE.out.discarded) | ||
| ch_paired_interleaved = ADAPTERREMOVAL_SE.out.paired_interleaved.mix(ADAPTERREMOVAL_PE.out.paired_interleaved) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be hones,t I think it's very rare anyone uses these... IU would maybe drop support for them to simplify |
||
| ch_versions = ch_versions.mix(ADAPTERREMOVAL_SE.out.versions.first(), ADAPTERREMOVAL_PE.out.versions.first()) | ||
| ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_PE.out.settings, ADAPTERREMOVAL_SE.out.settings) | ||
| } else { | ||
| error('Please choose one of the available adapter removal and merging tools: ["trimmomatic", "cutadapt", "trimgalore", "bbduk", "leehom", "fastp", "adapterremoval"]') | ||
| } | ||
|
|
||
| emit: | ||
| processed_reads = ch_processed_reads // channel: [ val(meta), [ fastq.gz ] ] | ||
| discarded_reads = ch_discarded_reads // channel: [ val(meta), [ fastq.gz ] ] | ||
| paired_interleaved = ch_paired_interleaved // channel: [ val(meta), [ fastq.gz ] ] | ||
| logfile = ch_log // channel: [ val(meta), [ {log,txt} ] ] | ||
| report = ch_report // channel: [ val(meta), [ {summary,html,zip} ] ] | ||
| versions = ch_versions // channel: [ versions.yml ] | ||
| multiqc_files = ch_multiqc_files | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json | ||
| name: "fastq_removeadapters_merge" | ||
| description: Remove adapters and merge reads based on various module choices | ||
| keywords: | ||
| - adapters | ||
| - removal | ||
| - short reads | ||
| - merge | ||
| - trim | ||
| components: | ||
| - trimmomatic | ||
| - cutadapt | ||
| - trimgalore | ||
| - bbmap/bbduk | ||
| - leehom | ||
| - fastp | ||
| - adapterremoval | ||
| input: | ||
| - ch_input_reads: | ||
| type: file | ||
| description: | | ||
| List of FastQ files of size 1 and 2 for single-end and paired-end data, respectively. | ||
| Structure: [ val(meta), [ path(reads) ] ] | ||
| - val_adapter_tool: | ||
| type: string | ||
| description: | | ||
| Choose one of the available adapter removal and/or merging tools | ||
| enum: ["trimmomatic", "cutadapt", "trimgalore", "bbduk", "leehom", "fastp", "adapterremoval"] | ||
| - ch_adapters: | ||
| type: file | ||
| description: | | ||
| Optional reference files, containing adapter and/or contaminant sequences for removal. | ||
| In fasta format for bbmap/bbduk and fastp, or in text format for AdapterRemoval (one adapter per line). | ||
| - val_save_merged: | ||
| type: boolean | ||
| description: | | ||
| Specify true to output merged reads instead | ||
| Used by fastp and adapterremoval | ||
| - val_fastp_discard_trimmed_pass: | ||
| type: boolean | ||
| description: | | ||
| Used only by fastp. | ||
| Specify true to not write any reads that pass trimming thresholds from the fastp process. | ||
| This can be used to use fastp for the output report only. | ||
| - val_fastp_save_trimmed_fail: | ||
| type: boolean | ||
| description: | | ||
| Used only by fastp. | ||
| Specify true to save files that failed to pass fastp trimming thresholds | ||
| output: | ||
| - processed_reads: | ||
| type: file | ||
| description: | | ||
| Structure: [ val(meta), path(fastq.gz) ] | ||
| The trimmed/modified single or paired end or merged fastq reads | ||
| pattern: "*.fastq.gz" | ||
| - discarded_reads: | ||
| type: file | ||
| description: | | ||
| Structure: [ val(meta), path(fastq.gz) ] | ||
| The discarded reads | ||
| pattern: "*.fastq.gz" | ||
| - paired_interleaved: | ||
| type: file | ||
| description: | | ||
| Structure: [ val(meta), path(fastq.gz) ] | ||
| Adapterremoval paired-end reads in a single file, interleaving mate 1 and mate 2 reads | ||
| pattern: "*.paired.fastq.gz" | ||
| - logfile: | ||
| type: file | ||
| description: | | ||
| Execution log file | ||
| (trimmomatic {log}, trimgalore {txt}, fastp {log}) | ||
| pattern: "*.{log,txt}" | ||
| - report: | ||
| type: file | ||
| description: | | ||
| Execution report | ||
| (trimmomatic {summary}, trimgalore {html,zip}, fastp {html}) | ||
| pattern: "*.{summary,html,zip}" | ||
| - versions: | ||
| type: file | ||
| description: | | ||
| File containing software versions | ||
| Structure: [ path(versions.yml) ] | ||
| pattern: "versions.yml" | ||
| - multiqc_files: | ||
| type: file | ||
| description: | | ||
| MultiQC-compatible output files from tools used in preprocessing | ||
| (trimmomatic, cutadapt, bbduk, leehom, fastp, adapterremoval) | ||
| authors: | ||
| - "@kornkv" | ||
| - "@vagkaratzas" | ||
| maintainers: | ||
| - "@kornkv" | ||
| - "@vagkaratzas" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For clarity of a pipeline dev maybe
ch_adapters_fileor ach_custom_adapters_file?