nf-core · bentsherman · May 15, 2025
diff --git a/main.nf b/main.nf
@@ -9,43 +9,39 @@
 ----------------------------------------------------------------------------------------
 */
 
-nextflow.preview.output = true
+nextflow.preview.types = true
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
+    IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS / TYPES
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
 include { SRA                     } from './workflows/sra'
 include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline'
 include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_nfcore_fetchngs_pipeline'
-include { softwareVersionsToYAML  } from './subworkflows/nf-core/utils_nfcore_pipeline'
+include { SOFTWARE_VERSIONS       } from './subworkflows/nf-core/utils_nfcore_pipeline'
+include { Sample                  } from './workflows/sra'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    NAMED WORKFLOWS FOR PIPELINE
+    WORKFLOW INPUTS
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-//
-// WORKFLOW: Run main nf-core/fetchngs analysis pipeline depending on type of identifier provided
-//
-workflow NFCORE_FETCHNGS {
+params {
 
-    take:
-    ids // channel: database ids read in from --input
+    // Set of SRA/ENA/GEO/DDBJ identifiers to download their associated metadata and FastQ files
+    input: Path
 
-    main:
+    // Comma-separated list of ENA metadata fields to fetch before downloading data.
+    ena_metadata_fields: String = ''
 
-    //
-    // WORKFLOW: Download FastQ files for SRA / ENA / GEO / DDBJ ids
-    //
-    SRA ( ids )
+    // Only download metadata for public data database ids and don't download the FastQ files.
+    skip_fastq_download: Boolean = false
 
-    emit:
-    samples = SRA.out.samples
-    metadata = SRA.out.metadata
+    // dbGaP repository key.
+    dbgap_key: Path?
 }
 
 /*
@@ -60,42 +56,59 @@ workflow {
     //
     // SUBWORKFLOW: Run initialisation tasks
     //
-    PIPELINE_INITIALISATION (
+    ids = PIPELINE_INITIALISATION (
         params.version,
         params.validate_params,
         params.monochrome_logs,
         args,
-        params.outdir,
+        workflow.outputDir,
         params.input,
         params.ena_metadata_fields
     )
 
     //
     // WORKFLOW: Run primary workflows for the pipeline
     //
-    NFCORE_FETCHNGS (
-        PIPELINE_INITIALISATION.out.ids
+    sra = SRA (
+        Channel.fromList(ids),
+        [
+            ena_metadata_fields: params.ena_metadata_fields,
+            skip_fastq_download: params.skip_fastq_download,
+            dbgap_key: params.dbgap_key
+        ]
     )
+
+    //
+    // SUBWORKFLOW: Collect software versions
+    //
+    versions = SOFTWARE_VERSIONS()
+
     //
     // SUBWORKFLOW: Run completion tasks
     //
     PIPELINE_COMPLETION (
         params.email,
         params.email_on_fail,
         params.plaintext_email,
-        params.outdir,
+        workflow.outputDir,
         params.monochrome_logs,
         params.hook_url
     )
 
     publish:
-    samples = NFCORE_FETCHNGS.out.samples
-    metadata = NFCORE_FETCHNGS.out.metadata
-    versions = softwareVersionsToYAML()
+    samples = sra.samples
+    metadata = sra.metadata
+    versions = versions
 }
 
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    WORKFLOW OUTPUTS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
 output {
-    samples {
+    samples: Channel<Sample> {
         path { sample ->
             sample.fastq_1 >> 'fastq/'
             sample.fastq_2 >> 'fastq/'
@@ -107,12 +120,11 @@ output {
         }
     }
 
-    metadata {
+    metadata: Channel<Path> {
         path 'metadata'
     }
 
-    versions {
-        path '.'
+    versions: Map<String,Map<String,String>> {
         index {
             path 'nf_core_fetchngs_software_mqc_versions.yml'
         }

diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf
@@ -1,5 +1,5 @@
 process ASPERA_CLI {
-    tag "$meta.id"
+    tag "$id"
     label 'process_medium'
 
     conda "${moduleDir}/environment.yml"
@@ -8,29 +8,39 @@ process ASPERA_CLI {
         'biocontainers/aspera-cli:4.14.0--hdfd78af_1' }"
 
     input:
-    tuple val(meta), val(fastq)
-    val user
+    id              : String
+    single_end      : Boolean
+    fastq_aspera    : String
+    md5_1           : String
+    md5_2           : String?
+    user            : String
 
     output:
-    tuple val(meta), path("*fastq.gz"), emit: fastq
-    tuple val(meta), path("*md5")     , emit: md5
-    tuple val("${task.process}"), val('aspera_cli'), eval('ascli --version'), topic: versions
+    id      : String = id
+    fastq_1 : Path  = file('*_1.fastq.gz')
+    fastq_2 : Path? = file('*_2.fastq.gz')
+    md5_1   : Path  = file('*_1.fastq.gz.md5')
+    md5_2   : Path? = file('*_2.fastq.gz.md5')
+
+    topic:
+    [process: task.process, name: 'aspera_cli', version: eval('ascli --version')] >> 'versions'
 
     script:
     def args = task.ext.args ?: ''
     def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : ""
-    if (meta.single_end) {
+    def fastq = fastq_aspera.tokenize(';')
+    if (single_end) {
         """
         $conda_prefix
 
         ascp \\
             $args \\
             -i \$CONDA_PREFIX/etc/aspera/aspera_bypass_dsa.pem \\
             ${user}@${fastq[0]} \\
-            ${meta.id}.fastq.gz
+            ${id}.fastq.gz
 
-        echo "${meta.md5_1}  ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5
-        md5sum -c ${meta.id}.fastq.gz.md5
+        echo "${md5_1}  ${id}.fastq.gz" > ${id}.fastq.gz.md5
+        md5sum -c ${id}.fastq.gz.md5
         """
     } else {
         """
@@ -40,19 +50,19 @@ process ASPERA_CLI {
             $args \\
             -i \$CONDA_PREFIX/etc/aspera/aspera_bypass_dsa.pem \\
             ${user}@${fastq[0]} \\
-            ${meta.id}_1.fastq.gz
+            ${id}_1.fastq.gz
 
-        echo "${meta.md5_1}  ${meta.id}_1.fastq.gz" > ${meta.id}_1.fastq.gz.md5
-        md5sum -c ${meta.id}_1.fastq.gz.md5
+        echo "${md5_1}  ${id}_1.fastq.gz" > ${id}_1.fastq.gz.md5
+        md5sum -c ${id}_1.fastq.gz.md5
 
         ascp \\
             $args \\
             -i \$CONDA_PREFIX/etc/aspera/aspera_bypass_dsa.pem \\
             ${user}@${fastq[1]} \\
-            ${meta.id}_2.fastq.gz
+            ${id}_2.fastq.gz
 
-        echo "${meta.md5_2}  ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5
-        md5sum -c ${meta.id}_2.fastq.gz.md5
+        echo "${md5_2}  ${id}_2.fastq.gz" > ${id}_2.fastq.gz.md5
+        md5sum -c ${id}_2.fastq.gz.md5
         """
     }
 }
diff --git a/modules/local/sra_fastq_ftp/main.nf b/modules/local/sra_fastq_ftp/main.nf
@@ -1,6 +1,6 @@
 
 process SRA_FASTQ_FTP {
-    tag "$meta.id"
+    tag "$id"
     label 'process_low'
     label 'error_retry'
 
@@ -10,42 +10,52 @@ process SRA_FASTQ_FTP {
         'biocontainers/wget:1.21.4' }"
 
     input:
-    tuple val(meta), val(fastq)
+    id              : String
+    single_end      : Boolean
+    fastq_1         : String
+    fastq_2         : String?
+    md5_1           : String
+    md5_2           : String?
 
     output:
-    tuple val(meta), path("*fastq.gz"), emit: fastq
-    tuple val(meta), path("*md5")     , emit: md5
-    tuple val("${task.process}"), val('wget'), eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')"), topic: versions
+    id      : String = id
+    fastq_1 : Path  = file('*_1.fastq.gz')
+    fastq_2 : Path? = file('*_2.fastq.gz')
+    md5_1   : Path  = file('*_1.fastq.gz.md5')
+    md5_2   : Path? = file('*_2.fastq.gz.md5')
+
+    topic:
+    [process: task.process, name: 'wget', version: eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')")] >> 'versions'
 
     script:
     def args = task.ext.args ?: ''
-    if (meta.single_end) {
+    if (single_end) {
         """
         wget \\
             $args \\
-            -O ${meta.id}.fastq.gz \\
-            ${fastq[0]}
+            -O ${id}.fastq.gz \\
+            ${fastq_1}
 
-        echo "${meta.md5_1}  ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5
-        md5sum -c ${meta.id}.fastq.gz.md5
+        echo "${md5_1}  ${id}.fastq.gz" > ${id}.fastq.gz.md5
+        md5sum -c ${id}.fastq.gz.md5
         """
     } else {
         """
         wget \\
             $args \\
-            -O ${meta.id}_1.fastq.gz \\
-            ${fastq[0]}
+            -O ${id}_1.fastq.gz \\
+            ${fastq_1}
 
-        echo "${meta.md5_1}  ${meta.id}_1.fastq.gz" > ${meta.id}_1.fastq.gz.md5
-        md5sum -c ${meta.id}_1.fastq.gz.md5
+        echo "${md5_1}  ${id}_1.fastq.gz" > ${id}_1.fastq.gz.md5
+        md5sum -c ${id}_1.fastq.gz.md5
 
         wget \\
             $args \\
-            -O ${meta.id}_2.fastq.gz \\
-            ${fastq[1]}
+            -O ${id}_2.fastq.gz \\
+            ${fastq_2}
 
-        echo "${meta.md5_2}  ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5
-        md5sum -c ${meta.id}_2.fastq.gz.md5
+        echo "${md5_2}  ${id}_2.fastq.gz" > ${id}_2.fastq.gz.md5
+        md5sum -c ${id}_2.fastq.gz.md5
         """
     }
 }
diff --git a/modules/local/sra_ids_to_runinfo/main.nf b/modules/local/sra_ids_to_runinfo/main.nf
@@ -9,12 +9,14 @@ process SRA_IDS_TO_RUNINFO {
         'biocontainers/python:3.9--1' }"
 
     input:
-    val id
-    val fields
+    id      : String
+    fields  : String
 
     output:
-    path "*.tsv"       , emit: tsv
-    tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions
+    file('*.runinfo.tsv')
+
+    topic:
+    [process: task.process, name: 'python', version: eval("python --version | sed 's/Python //g'")] >> 'versions'
 
     script:
     def metadata_fields = fields ? "--ena_metadata_fields ${fields}" : ''

diff --git a/modules/local/sra_runinfo_to_ftp/main.nf b/modules/local/sra_runinfo_to_ftp/main.nf
@@ -7,16 +7,18 @@ process SRA_RUNINFO_TO_FTP {
         'biocontainers/python:3.9--1' }"
 
     input:
-    path runinfo
+    runinfo : Path
 
     output:
-    path "*.tsv"       , emit: tsv
-    tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions
+    file('*.runinfo_ftp.tsv')
+
+    topic:
+    [process: task.process, name: 'python', version: eval("python --version | sed 's/Python //g'")] >> 'versions'
 
     script:
     """
     sra_runinfo_to_ftp.py \\
-        ${runinfo.join(',')} \\
-        ${runinfo.toString().tokenize(".")[0]}.runinfo_ftp.tsv
+        ${runinfo} \\
+        ${runinfo.baseName.tokenize(".")[0]}.runinfo_ftp.tsv
     """
 }
diff --git a/modules/nf-core/custom/sratoolsncbisettings/main.nf b/modules/nf-core/custom/sratoolsncbisettings/main.nf