Skip to content

Commit e189463

Browse files
committed
Add label to processes denoting expected speed
1 parent bbe3c52 commit e189463

File tree

7 files changed

+74
-45
lines changed

7 files changed

+74
-45
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
work
44
.DS_Store
55
autodiff-experiments.iml
6+
.Rproj.user

README.md

+8
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ the gradient using the [BEAGLE] library. BITO is only available in treeflow and
2020
You will need to install [nextflow](https://www.nextflow.io) and [docker](https://www.docker.com) to run this benchmark.
2121
Docker is not required but it is highly recommended to use it due to the numerous dependencies.
2222

23+
## Installation
24+
25+
git clone 4ment/autodiff-experiments.git
26+
27+
### Initialize treetime_validation
28+
29+
git submodule update --init --recursive
30+
2331
## Running the pipeline with docker
2432

2533
nextflow run 4ment/autodiff-experiments -profile docker

bin/physher-parser.py

-39
This file was deleted.

main.nf

+26-5
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,21 @@
22

33
nextflow.enable.dsl = 2
44

5+
params.reuse = false
56
params.results = "results"
67
params.enable_beast = false
7-
params.subtrees_alignment = "$baseDir/treetime_validation/resources/flu_H3N2/H3N2_HA_2011_2013.fasta"
88

99
include { treetime_validation } from "./modules/treetime_validation.nf" addParams(base: "$baseDir/treetime_validation")
1010
include { micro } from "./modules/micro.nf"
1111
include { macro_flu } from "./modules/macro_flu.nf"
1212

13+
dataset = "${baseDir}/treetime_validation/flu_H3N2/subtree_samples/dataset"
14+
subtrees_alignment = "$baseDir/treetime_validation/resources/flu_H3N2/H3N2_HA_2011_2013.fasta"
15+
16+
1317
process RUN_LSD {
18+
label 'ultrafast'
19+
1420
input:
1521
tuple val(size),
1622
val(rep),
@@ -32,6 +38,8 @@ process RUN_LSD {
3238
}
3339

3440
process CONVERT_LSD_NEXUS_TO_NEWICK {
41+
label 'ultrafast'
42+
3543
input:
3644
tuple val(size), val(rep), path(lsd_nexus)
3745
output:
@@ -55,6 +63,7 @@ def group_per_size_rep(newick_ch, create_sub_ch) {
5563
}
5664

5765
process CREATE_SUB_FILES {
66+
label 'ultrafast'
5867

5968
input:
6069
tuple val(size), val(rep), path(lsd_dates), path(newick_file)
@@ -66,7 +75,7 @@ process CREATE_SUB_FILES {
6675
path("H3N2_HA_2011_2013_${size}_${rep}.lsd_dates.new.txt")
6776
"""
6877
helper.py 0 \
69-
$params.subtrees_alignment \
78+
$subtrees_alignment \
7079
$lsd_dates \
7180
$newick_file \
7281
H3N2_HA_2011_2013_${size}_${rep}.new.nwk \
@@ -77,9 +86,21 @@ process CREATE_SUB_FILES {
7786

7887

7988
workflow {
80-
treetime_validation()
81-
82-
CREATE_SUB_FILES(treetime_validation.out)
89+
if (params.reuse) {
90+
subsets_ch = Channel.of(20, 50, 100, 200, 500, 750, 1000, 1250, 1500, 2000)
91+
replicates_ch = Channel.of(0..5)
92+
ch = subsets_ch.combine(replicates_ch)
93+
tt_ch = ch.map {
94+
tuple(it[0], it[1],
95+
file("${dataset}/LSD_out/H3N2_HA_2011_2013_${it[0]}_${it[1]}.lsd_dates.txt"),
96+
file("${dataset}/subtrees/H3N2_HA_2011_2013_${it[0]}_${it[1]}.nwk"))
97+
}
98+
} else {
99+
treetime_validation()
100+
tt_ch = treetime_validation.out
101+
}
102+
103+
CREATE_SUB_FILES(tt_ch)
83104

84105
RUN_LSD(CREATE_SUB_FILES.out)
85106

modules/macro_flu.nf

+31
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,37 @@ process RUN_TREEFLOW {
160160
-n ${params.iterations} > out.txt ; } 2> treeflow.${size}.${rep}.log
161161
"""
162162
}
163+
164+
process COMBIME_TIME_LOG {
165+
publishDir "$params.results/macro/", mode: 'copy'
166+
167+
input:
168+
path files
169+
output:
170+
path("macro.csv")
171+
172+
"""
173+
#!/usr/bin/env python
174+
import re
175+
176+
pattern_time = re.compile(r'Time: (\\d+\\.\\d+)')
177+
with open('macro.csv', 'w') as fpo:
178+
for file_path in ${files}:
179+
with open(file_path, 'r') as fp:
180+
for line in fp:
181+
line = line.rstrip('\\n').rstrip('\\r')
182+
mt = pattern_time.match(line)
183+
if mt:
184+
total_time = mt.group(1)
185+
a = file_path.rstrip('.log').split('.')
186+
if a[0] == 'torchtree':
187+
if a[1] == 'true':
188+
a[0] = 'bitorch'
189+
del a[1]
190+
fpo.write(a.join(',') + '\n')
191+
"""
192+
}
193+
163194
workflow macro_flu {
164195
take:
165196
data

modules/micro.nf

+7-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ params.results = "results"
88
phylox = Channel.of("torchtree", "bitorch", "phylojax")
99

1010
process RUN_PHYSHER_BENCHMARK {
11+
label 'fast'
12+
1113
publishDir "$params.results/micro/physher", mode: 'copy'
1214

1315
input:
@@ -27,6 +29,7 @@ process RUN_PHYSHER_BENCHMARK {
2729
}
2830

2931
process RUN_PHYLOX_BENCHMARK {
32+
label 'normal'
3033
label 'bito'
3134

3235
publishDir "$params.results/micro/${phylox}", mode: 'copy'
@@ -54,6 +57,7 @@ process RUN_PHYLOX_BENCHMARK {
5457
}
5558

5659
process RUN_TREEFLOW_BENCHMARK {
60+
label 'normal'
5761
label 'bito'
5862

5963
publishDir "$params.results/micro/treeflow", mode: 'copy'
@@ -76,6 +80,8 @@ process RUN_TREEFLOW_BENCHMARK {
7680
}
7781

7882
process COMBIME_CSV {
83+
label 'ultrafast'
84+
7985
publishDir "$params.results/micro/", mode: 'copy'
8086

8187
input:
@@ -85,7 +91,7 @@ process COMBIME_CSV {
8591

8692
"""
8793
head -n1 ${files[0]} > micro.csv
88-
tail -q -n+2 *.csv >> micro.csv
94+
tail -q -n+2 *[0-9].csv >> micro.csv
8995
"""
9096
}
9197

nextflow.config

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ manifest {
66
mainScript = 'main.nf'
77
}
88

9+
executor.cpus = 1
910
profiles {
1011
docker {
1112
process.container = '4ment/autodiff-experiments'

0 commit comments

Comments
 (0)