Skip to content
This repository was archived by the owner on Oct 2, 2020. It is now read-only.

Commit 8ae6150

Browse files
committed
Merge pull request #26 from SciDAP/master
draft3 conversion and some metadata suggestions
2 parents 15f32f7 + 0084628 commit 8ae6150

23 files changed

+34072
-159
lines changed

.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
11
.vagrant
22
.DS_Store
33
/docker
4+
*.bam
5+
*.bai
6+
*.fa
7+
*.fastq
8+
*.bz2
9+
*.gz
10+
*.gtf
11+

tools/README

Lines changed: 0 additions & 1 deletion
This file was deleted.

tools/README.md

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
Sample CWL Command Line Tools.
2+
3+
# Testing CWLs
4+
5+
Test directory includes:
6+
* dm3_chr4.fa - Chromosome 4 of Drosophila genome
7+
* dm3_chr4.gtf - Chromosome 4 RefSeq annotation file
8+
* SRR1031972.fastq - The reduced raw reads file ( reads from Chromosome 4 only, RNA-Seq data)
9+
10+
To test tools they have to be executed in particular order to produce input for the next one. Make ```./workflow/tools``` your current working directory
11+
and you will run command like this ```cwtool --basedir ./ ./TOOL.cwl ./jobs/TOOL-job.json```.
12+
13+
## Tools
14+
15+
Indexing genome
16+
---------------
17+
18+
The first step is to indexing our genome. I'm going to use *STAR genereteGenome*. To do that
19+
I run ```cwltool --basedir ./ ./STAR.cwl ./jobs/STAR-job-index.json```. dm3 genome will be place into ./test-files/dm3 directory.
20+
21+
Output from cwltool:
22+
23+
```
24+
/usr/local/bin/cwltool 1.0.20151125221324
25+
[job 4532697104] ./workflows/tools$ docker run -i --volume=./workflows/tools/test-files/dm3_chr4.gtf:/tmp/job257401672_test-files/dm3_chr4.gtf:ro --volume=./workflows/tools/test-files/dm3_chr4.fa:/tmp/job257401672_test-files/dm3_chr4.fa:ro --volume=./workflows/tools:/tmp/job_output:rw --volume=/var/folders/hx/3qsmpl9s50zdmn49jb03l_tw0000gn/T/tmpWo01Wd:/tmp/job_tmp:rw --workdir=/tmp/job_output --read-only=true --user=1000 --rm --env=TMPDIR=/tmp/job_tmp --env=PATH=/usr/local/bin/:/usr/bin:/bin scidap/star:v2.5.0a STAR --genomeDir ./test-files/dm3/ --genomeFastaFiles /tmp/job257401672_test-files/dm3_chr4.fa --outBAMcompression 10 --outSAMmode Full --outSAMtype BAM SortedByCoordinate --outStd Log --runMode genomeGenerate --runThreadN 4 --sjdbGTFfile /tmp/job257401672_test-files/dm3_chr4.gtf --sjdbOverhang 100
26+
Nov 26 17:57:46 ..... Started STAR run
27+
Nov 26 17:57:46 ... Starting to generate Genome files
28+
Nov 26 17:57:46 ... starting to sort Suffix Array. This may take a long time...
29+
Nov 26 17:57:46 ... sorting Suffix Array chunks and saving them to disk...
30+
Nov 26 17:57:47 ... loading chunks from disk, packing SA...
31+
Nov 26 17:57:47 ... Finished generating suffix array
32+
Nov 26 17:57:47 ... Generating Suffix Array index
33+
Nov 26 17:57:50 ... Completed Suffix Array index
34+
Nov 26 17:57:50 ..... Processing annotations GTF
35+
Nov 26 17:57:50 ..... Inserting junctions into the genome indices
36+
Nov 26 17:57:58 ... writing Genome to disk ...
37+
Nov 26 17:57:58 ... writing Suffix Array to disk ...
38+
Nov 26 17:57:58 ... writing SAindex to disk
39+
Nov 26 17:58:35 ..... Finished successfully
40+
Final process status is success
41+
{
42+
"indices": {
43+
"path": "./test-files/dm3//Genome",
44+
"size": 1753563,
45+
"secondaryFiles": [
46+
{
47+
"path": "./test-files/dm3//SA",
48+
"class": "File"
49+
},
50+
{
51+
"path": "./test-files/dm3//SAindex",
52+
"class": "File"
53+
},
54+
{
55+
"path": "./test-files/dm3//chrNameLength.txt",
56+
"class": "File"
57+
},
58+
{
59+
"path": "./test-files/dm3//chrLength.txt",
60+
"class": "File"
61+
},
62+
{
63+
"path": "./test-files/dm3//chrStart.txt",
64+
"class": "File"
65+
},
66+
{
67+
"path": "./test-files/dm3//geneInfo.tab",
68+
"class": "File"
69+
},
70+
{
71+
"path": "./test-files/dm3//sjdbList.fromGTF.out.tab",
72+
"class": "File"
73+
},
74+
{
75+
"path": "./test-files/dm3//chrName.txt",
76+
"class": "File"
77+
},
78+
{
79+
"path": "./test-files/dm3//exonGeTrInfo.tab",
80+
"class": "File"
81+
},
82+
{
83+
"path": "./test-files/dm3//genomeParameters.txt",
84+
"class": "File"
85+
},
86+
{
87+
"path": "./test-files/dm3//sjdbList.out.tab",
88+
"class": "File"
89+
},
90+
{
91+
"path": "./test-files/dm3//exonInfo.tab",
92+
"class": "File"
93+
},
94+
{
95+
"path": "./test-files/dm3//sjdbInfo.txt",
96+
"class": "File"
97+
},
98+
{
99+
"path": "./test-files/dm3//transcriptInfo.tab",
100+
"class": "File"
101+
}
102+
],
103+
"class": "File",
104+
"checksum": "sha1$761906d19ceb10a0e2677afdfb756c4f1ca925a1"
105+
},
106+
"aligned": null,
107+
"mappingstats": null
108+
}%
109+
```
110+
111+
Reads alignment
112+
---------------
113+
114+
To align reads run ```cwltool --basedir ./ ./STAR.cwl ./jobs/STAR-job-rna.json```
115+
116+
```
117+
/usr/local/bin/cwltool 1.0.20151126171959
118+
[job 4518994960] ./workflows/tools$ docker run -i --volume=./workflows/tools/test-files/dm3/:/tmp/job958208261_test-files/dm3/:ro --volume=./workflows/tools/test-files/SRR1031972.fastq:/tmp/job958208261_test-files/SRR1031972.fastq:ro --volume=./workflows/tools:/tmp/job_output:rw --volume=/var/folders/hx/3qsmpl9s50zdmn49jb03l_tw0000gn/T/tmptuCn_d:/tmp/job_tmp:rw --workdir=/tmp/job_output --read-only=true --user=1000 --rm --env=TMPDIR=/tmp/job_tmp --env=PATH=/usr/local/bin/:/usr/bin:/bin scidap/star:v2.5.0a STAR --genomeDir /tmp/job958208261_test-files/dm3/ --outBAMcompression 10 --outFileNamePrefix ./test-files/SRR1031972. --outSAMmode Full --outSAMtype BAM SortedByCoordinate --outStd Log --readFilesIn /tmp/job958208261_test-files/SRR1031972.fastq --runMode alignReads --runThreadN 4
119+
Nov 26 19:27:58 ..... Started STAR run
120+
Nov 26 19:27:58 ..... Loading genome
121+
Nov 26 19:28:08 ..... Started mapping
122+
Nov 26 19:29:34 ..... Started sorting BAM
123+
Nov 26 19:29:36 ..... Finished successfully
124+
Final process status is success
125+
{
126+
"indices": null,
127+
"aligned": {
128+
"path": "./workflows/tools/./test-files/SRR1031972.Aligned.sortedByCoord.out.bam",
129+
"size": 22139153,
130+
"secondaryFiles": [
131+
{
132+
"path": "./test-files/SRR1031972.Log.final.out",
133+
"class": "File"
134+
},
135+
{
136+
"path": "./test-files/SRR1031972.SJ.out.tab",
137+
"class": "File"
138+
},
139+
{
140+
"path": "./test-files/SRR1031972.Log.out",
141+
"class": "File"
142+
}
143+
],
144+
"class": "File",
145+
"checksum": "sha1$ba38fcd1f238553d244f339d1147cd591324e207"
146+
},
147+
"mappingstats": "[{\"Started job on \":\"Nov 26 19:27:58\"},{\"Started mapping on \":\"Nov 26 19:28:08\"},{\"Finished on \":\"Nov 26 19:29:36\"},{\"Mapping speed, Million of reads per hour \":\"8.55\"},{\"Number of input reads \":\"209081\"},{\"Average input read length \":\"40\"},{\"Uniquely mapped reads number \":\"64313\"},{\"Uniquely mapped reads % \":\"30.76%\"},{\"Average mapped length \":\"38.19\"},{\"Number of splices: Total \":\"14213\"},{\"Number of splices: Annotated (sjdb) \":\"1640\"},{\"Number of splices: GT/AG \":\"12072\"},{\"Number of splices: GC/AG \":\"299\"},{\"Number of splices: AT/AC \":\"3\"},{\"Number of splices: Non-canonical \":\"1839\"},{\"Mismatch rate per base, % \":\"1.86%\"},{\"Deletion rate per base \":\"0.00%\"},{\"Deletion average length \":\"1.25\"},{\"Insertion rate per base \":\"0.00%\"},{\"Insertion average length \":\"1.03\"},{\"Number of reads mapped to multiple loci \":\"144768\"},{\"% of reads mapped to multiple loci \":\"69.24%\"},{\"Number of reads mapped to too many loci \":\"0\"},{\"% of reads mapped to too many loci \":\"0.00%\"},{\"% of reads unmapped: too many mismatches \":\"0.00%\"},{\"% of reads unmapped: too short \":\"0.00%\"},{\"% of reads unmapped: other \":\"0.00%\"}]"
148+
}%
149+
```
150+
151+
Indexing .bam file
152+
------------------
153+
154+
To index the .bam file ```cwltool --basedir ./ --outdir ./test-files ./samtools-index.cwl ./jobs/samtools-index-job.json```
155+
156+
Result:
157+
```json
158+
{
159+
"sorted": {
160+
"path": "././test-files/SRR1031972.Aligned.sortedByCoord.out.bam.bai",
161+
"size": 40528,
162+
"class": "File",
163+
"checksum": "sha1$83738ffada23f654ba1f471973c7dccceb14cffc"
164+
}
165+
}
166+
```
167+
168+
Genome coverage
169+
---------------
170+
171+
To create a genome coverage .bedGraph file ```cwltool --basedir ./ ./bedtools-genomecov.cwl ./jobs/bedtools-genomecov-job.json```
172+
173+
Result:
174+
```json
175+
{
176+
"genomecoverage": {
177+
"path": "./workflows/tools/./test-files/SRR1031972.bedGraph",
178+
"size": 1423143,
179+
"class": "File",
180+
"checksum": "sha1$dd87be96fc201734c2e5017f86e056b4bb0b2b3f"
181+
}
182+
}
183+
```
184+
185+
Sort bedGraph
186+
-------------
187+
188+
To sort the .bedGraph file by first and second column ```cwltool --basedir ./ --outdir ./test-files ./linux-sort.cwl ./jobs/linux-sort-job.json```
189+
190+
Result:
191+
```json
192+
{
193+
"sorted": {
194+
"path": "././test-files/SRR1031972.bedGraph.sorted",
195+
"size": 1423143,
196+
"class": "File",
197+
"checksum": "sha1$dd87be96fc201734c2e5017f86e056b4bb0b2b3f"
198+
}
199+
}
200+
```
201+
202+
bedGraph to bigWig
203+
------------------
204+
205+
To produce final .bigWig file ```cwltool --basedir ./ ./ucsc-bedGraphToBigWig.cwl ./jobs/ucsc-bedGraphToBigWig-job.json```
206+
207+
Result:
208+
209+
```json
210+
{
211+
"bigWigOut": {
212+
"path": "./workflows/tools/./test-files/SRR1031972.bigWig",
213+
"size": 500098,
214+
"class": "File",
215+
"checksum": "sha1$5a0332a2fce2303f135439d377f8b7420878a7b5"
216+
}
217+
}
218+
```
219+

tools/STAR-job-index.json

Lines changed: 0 additions & 11 deletions
This file was deleted.

tools/STAR-job-rna.json

Lines changed: 0 additions & 6 deletions
This file was deleted.

tools/STAR.cwl

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env cwl-runner
2-
# "@base": "https://w3id.org/cwl/cwl#"
32

43
"@context":
4+
"cwl": "https://w3id.org/cwl/cwl#"
55
"foaf": "http://xmlns.com/foaf/0.1/"
66
"doap": "http://usefulinc.com/ns/doap"
77
"adms": "http://purl.org/adms/"
@@ -30,7 +30,7 @@ adms:Asset:
3030
- foaf:title: "(Dobin et al., 2013) STAR: ultrafast universal RNA-seq aligner. Bioinformatics."
3131
foaf:homepage: "http://www.ncbi.nlm.nih.gov/pubmed/23104886"
3232
doap:developer:
33-
- foaf:Person:
33+
foaf:Person:
3434
foaf:name: "Alexander Dobin"
3535
foaf:mbox: "mailto:dobin at cshl.edu"
3636
foaf:fundedBy: "This work was funded by NHGRI (NIH) grant U54HG004557"
@@ -643,7 +643,6 @@ inputs:
643643
- 'null'
644644
- string
645645
description: |
646-
None
647646
string: output of unmapped reads in the SAM format
648647
None ... no output
649648
Within ... output unmapped reads within the main SAM file (i.e. Aligned.out.sam)
@@ -1618,23 +1617,31 @@ outputs:
16181617
type: ["null",File]
16191618
outputBinding:
16201619
glob: |
1621-
${
1622-
if (inputs.runMode == "genomeGenerate")
1623-
return inputs.genomeDir+"/Genome";
1620+
${
1621+
if (inputs.runMode != "genomeGenerate")
16241622
return [];
1625-
}
1623+
return inputs.genomeDir+"/Genome";
1624+
}
16261625
secondaryFiles: |
1627-
${
1628-
if (inputs.runMode != "genomeGenerate")
1629-
return [];
1630-
1631-
var p=inputs.genomeDir;
1632-
return [
1633-
{"path": p+"/SA", "class":"File"},
1634-
{"path": p+"/SAindex", "class":"File"},
1635-
{"path": p+"/chrNameLength.txt", "class":"File"}
1636-
];
1637-
}
1626+
${
1627+
var p=inputs.genomeDir;
1628+
return [
1629+
{"path": p+"/SA", "class":"File"},
1630+
{"path": p+"/SAindex", "class":"File"},
1631+
{"path": p+"/chrNameLength.txt", "class":"File"},
1632+
{"path": p+"/chrLength.txt", "class":"File"},
1633+
{"path": p+"/chrStart.txt", "class":"File"},
1634+
{"path": p+"/geneInfo.tab", "class":"File"},
1635+
{"path": p+"/sjdbList.fromGTF.out.tab", "class":"File"},
1636+
{"path": p+"/chrName.txt", "class":"File"},
1637+
{"path": p+"/exonGeTrInfo.tab", "class":"File"},
1638+
{"path": p+"/genomeParameters.txt", "class":"File"},
1639+
{"path": p+"/sjdbList.out.tab", "class":"File"},
1640+
{"path": p+"/exonInfo.tab", "class":"File"},
1641+
{"path": p+"/sjdbInfo.txt", "class":"File"},
1642+
{"path": p+"/transcriptInfo.tab", "class":"File"}
1643+
];
1644+
}
16381645

16391646
- id: "#aligned"
16401647
type: ["null",File]
@@ -1672,11 +1679,15 @@ outputs:
16721679
${
16731680
if (inputs.runMode == "genomeGenerate")
16741681
return [];
1682+
16751683
var p = inputs.outFileNamePrefix?inputs.outFileNamePrefix:"";
16761684
return p+"Log.final.out";
16771685
}
16781686
outputEval: |
16791687
${
1688+
if (inputs.runMode == "genomeGenerate")
1689+
return "";
1690+
16801691
var s = self[0].contents.replace(/[ ]+.*?:\n|[ ]{2,}|\n$/g,"").
16811692
split(/\n{1,2}/g).map(function(v){var s=v.split(/\|\t/g); var o={}; o[s[0]]=s[1]; return o;})
16821693
return JSON.stringify(s);

tools/bedtools-genomecov-job.json

Lines changed: 0 additions & 14 deletions
This file was deleted.

0 commit comments

Comments
 (0)