Skip to content

Proposal: Static types #309

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: preview-25-04
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 42 additions & 30 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,43 +9,39 @@
----------------------------------------------------------------------------------------
*/

nextflow.preview.output = true
nextflow.preview.types = true

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS / TYPES
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

include { SRA } from './workflows/sra'
include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline'
include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline'
include { softwareVersionsToYAML } from './subworkflows/nf-core/utils_nfcore_pipeline'
include { SOFTWARE_VERSIONS } from './subworkflows/nf-core/utils_nfcore_pipeline'
include { Sample } from './workflows/sra'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NAMED WORKFLOWS FOR PIPELINE
WORKFLOW INPUTS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

//
// WORKFLOW: Run main nf-core/fetchngs analysis pipeline depending on type of identifier provided
//
workflow NFCORE_FETCHNGS {
params {

take:
ids // channel: database ids read in from --input
// Set of SRA/ENA/GEO/DDBJ identifiers to download their associated metadata and FastQ files
input: Path

main:
// Comma-separated list of ENA metadata fields to fetch before downloading data.
ena_metadata_fields: String = ''

//
// WORKFLOW: Download FastQ files for SRA / ENA / GEO / DDBJ ids
//
SRA ( ids )
// Only download metadata for public data database ids and don't download the FastQ files.
skip_fastq_download: Boolean = false

emit:
samples = SRA.out.samples
metadata = SRA.out.metadata
// dbGaP repository key.
dbgap_key: Path?
}

/*
Expand All @@ -60,42 +56,59 @@ workflow {
//
// SUBWORKFLOW: Run initialisation tasks
//
PIPELINE_INITIALISATION (
ids = PIPELINE_INITIALISATION (
params.version,
params.validate_params,
params.monochrome_logs,
args,
params.outdir,
workflow.outputDir,
params.input,
params.ena_metadata_fields
)

//
// WORKFLOW: Run primary workflows for the pipeline
//
NFCORE_FETCHNGS (
PIPELINE_INITIALISATION.out.ids
sra = SRA (
Channel.fromList(ids),
[
ena_metadata_fields: params.ena_metadata_fields,
skip_fastq_download: params.skip_fastq_download,
dbgap_key: params.dbgap_key
]
)

//
// SUBWORKFLOW: Collect software versions
//
versions = SOFTWARE_VERSIONS()

//
// SUBWORKFLOW: Run completion tasks
//
PIPELINE_COMPLETION (
params.email,
params.email_on_fail,
params.plaintext_email,
params.outdir,
workflow.outputDir,
params.monochrome_logs,
params.hook_url
)

publish:
samples = NFCORE_FETCHNGS.out.samples
metadata = NFCORE_FETCHNGS.out.metadata
versions = softwareVersionsToYAML()
samples = sra.samples
metadata = sra.metadata
versions = versions
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
WORKFLOW OUTPUTS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

output {
samples {
samples: Channel<Sample> {
path { sample ->
sample.fastq_1 >> 'fastq/'
sample.fastq_2 >> 'fastq/'
Expand All @@ -107,12 +120,11 @@ output {
}
}

metadata {
metadata: Channel<Path> {
path 'metadata'
}

versions {
path '.'
versions: Map<String,Map<String,String>> {
index {
path 'nf_core_fetchngs_software_mqc_versions.yml'
}
Expand Down
42 changes: 26 additions & 16 deletions modules/local/aspera_cli/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process ASPERA_CLI {
tag "$meta.id"
tag "$id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
Expand All @@ -8,29 +8,39 @@ process ASPERA_CLI {
'biocontainers/aspera-cli:4.14.0--hdfd78af_1' }"

input:
tuple val(meta), val(fastq)
val user
id : String
single_end : Boolean
fastq_aspera : String
md5_1 : String
md5_2 : String?
user : String

output:
tuple val(meta), path("*fastq.gz"), emit: fastq
tuple val(meta), path("*md5") , emit: md5
tuple val("${task.process}"), val('aspera_cli'), eval('ascli --version'), topic: versions
id : String = id
fastq_1 : Path = file('*_1.fastq.gz')
fastq_2 : Path? = file('*_2.fastq.gz')
md5_1 : Path = file('*_1.fastq.gz.md5')
md5_2 : Path? = file('*_2.fastq.gz.md5')

topic:
[process: task.process, name: 'aspera_cli', version: eval('ascli --version')] >> 'versions'

script:
def args = task.ext.args ?: ''
def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : ""
if (meta.single_end) {
def fastq = fastq_aspera.tokenize(';')
if (single_end) {
"""
$conda_prefix

ascp \\
$args \\
-i \$CONDA_PREFIX/etc/aspera/aspera_bypass_dsa.pem \\
${user}@${fastq[0]} \\
${meta.id}.fastq.gz
${id}.fastq.gz

echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5
md5sum -c ${meta.id}.fastq.gz.md5
echo "${md5_1} ${id}.fastq.gz" > ${id}.fastq.gz.md5
md5sum -c ${id}.fastq.gz.md5
"""
} else {
"""
Expand All @@ -40,19 +50,19 @@ process ASPERA_CLI {
$args \\
-i \$CONDA_PREFIX/etc/aspera/aspera_bypass_dsa.pem \\
${user}@${fastq[0]} \\
${meta.id}_1.fastq.gz
${id}_1.fastq.gz

echo "${meta.md5_1} ${meta.id}_1.fastq.gz" > ${meta.id}_1.fastq.gz.md5
md5sum -c ${meta.id}_1.fastq.gz.md5
echo "${md5_1} ${id}_1.fastq.gz" > ${id}_1.fastq.gz.md5
md5sum -c ${id}_1.fastq.gz.md5

ascp \\
$args \\
-i \$CONDA_PREFIX/etc/aspera/aspera_bypass_dsa.pem \\
${user}@${fastq[1]} \\
${meta.id}_2.fastq.gz
${id}_2.fastq.gz

echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5
md5sum -c ${meta.id}_2.fastq.gz.md5
echo "${md5_2} ${id}_2.fastq.gz" > ${id}_2.fastq.gz.md5
md5sum -c ${id}_2.fastq.gz.md5
"""
}
}
46 changes: 28 additions & 18 deletions modules/local/sra_fastq_ftp/main.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

process SRA_FASTQ_FTP {
tag "$meta.id"
tag "$id"
label 'process_low'
label 'error_retry'

Expand All @@ -10,42 +10,52 @@ process SRA_FASTQ_FTP {
'biocontainers/wget:1.21.4' }"

input:
tuple val(meta), val(fastq)
id : String
single_end : Boolean
fastq_1 : String
fastq_2 : String?
md5_1 : String
md5_2 : String?

output:
tuple val(meta), path("*fastq.gz"), emit: fastq
tuple val(meta), path("*md5") , emit: md5
tuple val("${task.process}"), val('wget'), eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')"), topic: versions
id : String = id
fastq_1 : Path = file('*_1.fastq.gz')
fastq_2 : Path? = file('*_2.fastq.gz')
md5_1 : Path = file('*_1.fastq.gz.md5')
md5_2 : Path? = file('*_2.fastq.gz.md5')

topic:
[process: task.process, name: 'wget', version: eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')")] >> 'versions'

script:
def args = task.ext.args ?: ''
if (meta.single_end) {
if (single_end) {
"""
wget \\
$args \\
-O ${meta.id}.fastq.gz \\
${fastq[0]}
-O ${id}.fastq.gz \\
${fastq_1}

echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5
md5sum -c ${meta.id}.fastq.gz.md5
echo "${md5_1} ${id}.fastq.gz" > ${id}.fastq.gz.md5
md5sum -c ${id}.fastq.gz.md5
"""
} else {
"""
wget \\
$args \\
-O ${meta.id}_1.fastq.gz \\
${fastq[0]}
-O ${id}_1.fastq.gz \\
${fastq_1}

echo "${meta.md5_1} ${meta.id}_1.fastq.gz" > ${meta.id}_1.fastq.gz.md5
md5sum -c ${meta.id}_1.fastq.gz.md5
echo "${md5_1} ${id}_1.fastq.gz" > ${id}_1.fastq.gz.md5
md5sum -c ${id}_1.fastq.gz.md5

wget \\
$args \\
-O ${meta.id}_2.fastq.gz \\
${fastq[1]}
-O ${id}_2.fastq.gz \\
${fastq_2}

echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5
md5sum -c ${meta.id}_2.fastq.gz.md5
echo "${md5_2} ${id}_2.fastq.gz" > ${id}_2.fastq.gz.md5
md5sum -c ${id}_2.fastq.gz.md5
"""
}
}
10 changes: 6 additions & 4 deletions modules/local/sra_ids_to_runinfo/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ process SRA_IDS_TO_RUNINFO {
'biocontainers/python:3.9--1' }"

input:
val id
val fields
id : String
fields : String

output:
path "*.tsv" , emit: tsv
tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions
file('*.runinfo.tsv')

topic:
[process: task.process, name: 'python', version: eval("python --version | sed 's/Python //g'")] >> 'versions'

script:
def metadata_fields = fields ? "--ena_metadata_fields ${fields}" : ''
Expand Down
12 changes: 7 additions & 5 deletions modules/local/sra_runinfo_to_ftp/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@ process SRA_RUNINFO_TO_FTP {
'biocontainers/python:3.9--1' }"

input:
path runinfo
runinfo : Path

output:
path "*.tsv" , emit: tsv
tuple val("${task.process}"), val('python'), eval("python --version | sed 's/Python //g'"), topic: versions
file('*.runinfo_ftp.tsv')

topic:
[process: task.process, name: 'python', version: eval("python --version | sed 's/Python //g'")] >> 'versions'

script:
"""
sra_runinfo_to_ftp.py \\
${runinfo.join(',')} \\
${runinfo.toString().tokenize(".")[0]}.runinfo_ftp.tsv
${runinfo} \\
${runinfo.baseName.tokenize(".")[0]}.runinfo_ftp.tsv
"""
}
9 changes: 4 additions & 5 deletions modules/nf-core/custom/sratoolsncbisettings/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading