|
20 | 20 | ## 08/19/19 ##
|
21 | 21 | ## add a checking step for vcf before merging ##
|
22 | 22 |
|
| 23 | +## add smg recovering module, Apr 23, 2020 ## |
| 24 | + |
23 | 25 | #!/usr/bin/perl
|
24 | 26 | ##!/gscmnt/gc2525/dinglab/rmashl/Software/perl/perl-5.22.0/bin/perl
|
25 | 27 | use strict;
|
26 | 28 | use warnings;
|
27 | 29 | #use POSIX;
|
28 | 30 | use Getopt::Long;
|
29 | 31 |
|
30 |
| -my $version = 1.5; |
| 32 | +my $version = 1.6; |
31 | 33 |
|
32 | 34 | #color code
|
33 | 35 | my $red = "\e[31m";
|
|
43 | 45 | Somatic variant calling pipeline
|
44 | 46 | Pipeline version: $version
|
45 | 47 |
|
46 |
| -$yellow Usage: perl $0 --srg --step --sre --rdir --ref --log --q --mincovt --mincovn --minvaf --maxindsize --exonic |
| 48 | +$yellow Usage: perl $0 --srg --step --sre --rdir --ref --log --q --mincovt --mincovn --minvaf --maxindsize --exonic --smg |
47 | 49 |
|
48 | 50 | $normal
|
49 | 51 |
|
|
59 | 61 | <minvaf> minimum somatic vaf: default >=0.05
|
60 | 62 | <maxindsize> default <=100
|
61 | 63 | <exonic> output exonic region: 1 Yes, 0 No
|
| 64 | +<smg> use smg list for calling |
| 65 | +hg38:/gscmnt/gc2521/dinglab/mwyczalk/somatic-wrapper-data/image.data/A_Reference/GRCh38.d1.vd1/GRCh38.d1.vd1.fa |
62 | 66 |
|
63 |
| -hg38: /gscmnt/gc2521/dinglab/mwyczalk/somatic-wrapper-data/image.data/A_Reference/GRCh38.d1.vd1.fa |
| 67 | +lscc smg: /gscmnt/gc3027/dinglab/medseq/smg_database/smg.lscc.tsv |
64 | 68 |
|
65 |
| -$red [0] Run all steps |
66 | 69 | $green [1] Run streka
|
67 | 70 | $green [2] Run Varscan
|
68 | 71 | $green [3] Run Pindel
|
|
94 | 97 | my $run_dir="";
|
95 | 98 | my $log_dir="";
|
96 | 99 | my $h38_REF="";
|
| 100 | +my $db_smg=""; |
97 | 101 | #my $ref_name="";
|
98 | 102 | my $chr_status=0;
|
99 | 103 | my $maxindsize=100;
|
|
109 | 113 | "exonic=i" => \$status_exonic,
|
110 | 114 | "rdir=s" => \$run_dir,
|
111 | 115 | "ref=s" => \$h38_REF,
|
| 116 | + "smg=s" => \$db_smg, |
112 | 117 | "log=s" => \$log_dir,
|
113 | 118 | "q=s" => \$q_name,
|
114 | 119 | "mincovt=i" => \$mincov_t,
|
|
243 | 248 | #&check_input_dir($run_dir);
|
244 | 249 | # start data processsing
|
245 | 250 |
|
246 |
| -if ($step_number < 12) { |
| 251 | +if ($step_number < 12 && $step_number>0) { |
247 | 252 | #begin to process each sample
|
248 | 253 | for (my $i=0;$i<@sample_dir_list;$i++) {#use the for loop instead. the foreach loop has some problem to pass the global variable $sample_name to the sub functions
|
249 | 254 | $sample_name = $sample_dir_list[$i];
|
@@ -1594,7 +1599,7 @@ sub bsub_parse_mutect{
|
1594 | 1599 | print PM "filtervcf=".$sample_full_path."/mutect1/mutect.raw.filtered.$chr.vcf\n";
|
1595 | 1600 | print PM "filtervcfsnv=".$sample_full_path."/mutect1/mutect.filter.snv.$chr.vcf\n";
|
1596 | 1601 | print PM "filtervcfindel=".$sample_full_path."/mutect1/mutect.filter.indel.$chr.vcf\n";
|
1597 |
| - print PM " ".$run_script_path."filter_mutect1.7.pl $samtools/samtools \${rawvcf} \${filtervcf} $mincov_t $mincov_n $minvaf\n"; |
| 1602 | + print PM " ".$run_script_path."filter_mutect1.8.pl $samtools/samtools \${rawvcf} \${filtervcf} $mincov_t $mincov_n $minvaf\n"; |
1598 | 1603 | # print MUTECT "java \${JAVA_OPTS} -jar "."$gatkexe3 -T SelectVariants -R $h38_REF -V \${filtervcf} -o \${filtervcfsnv} -selectType SNP -selectType MNP"."\n";
|
1599 | 1604 | # print MUTECT "java \${JAVA_OPTS} -jar "."$gatkexe3 -T SelectVariants -R $h38_REF -V \${filtervcf} -o \${filtervcfindel} -selectType INDEL"."\n";
|
1600 | 1605 | print PM "java \${JAVA_OPTS} -jar "."$mutect1 -T SelectVariants -R $h38_REF -V \${filtervcf} -o \${filtervcfsnv} -selectType SNP -selectType MNP"."\n";
|
@@ -1781,49 +1786,60 @@ sub bsub_vcf_2_maf{
|
1781 | 1786 | #print VARSCANP "#BSUB -q long\n";
|
1782 | 1787 | #print MAF "#BSUB -q research-hpc\n";
|
1783 | 1788 | #print MAF "#BSUB -w \"$hold_job_file\"","\n";
|
1784 |
| - print MAF "F_VCF_1=".$sample_full_path."/merged.filtered.withmutect.vcf\n"; |
1785 |
| -# print MAF "F_VCF_1=".$sample_full_path."/merged.withmutect.vcf\n"; |
1786 |
| -# print MAF "F_VCF_2=".$sample_full_path."/".$sample_name.".withmutect.vcf\n"; |
| 1789 | + |
| 1790 | + print MAF "F_VCF_1=".$sample_full_path."/merged.withmutect.vcf\n"; |
| 1791 | + print MAF "F_VCF_1_filtered=".$sample_full_path."/merged.filtered.withmutect.vcf\n"; |
1787 | 1792 | print MAF "F_VCF_2=".$sample_full_path."/".$sample_name.".withmutect.vcf\n";
|
| 1793 | + print MAF "F_VCF_2_filtered=".$sample_full_path."/".$sample_name.".withmutect.filtered.vcf\n"; |
1788 | 1794 | print MAF "F_VEP_1=".$sample_full_path."/merged.VEP.withmutect.vcf\n";
|
1789 |
| - print MAF "F_VEP_2=".$sample_full_path."/".$sample_name.".withmutect.vep.vcf\n"; |
| 1795 | + print MAF "F_VEP_1_filtered=".$sample_full_path."/merged.VEP.withmutect.filtered.vcf\n"; |
| 1796 | + print MAF "F_VEP_2=".$sample_full_path."/".$sample_name.".withmutect.vep.vcf\n"; |
| 1797 | + print MAF "F_VEP_2_filtered=".$sample_full_path."/".$sample_name.".withmutect.filtered.vep.vcf\n"; |
1790 | 1798 | print MAF "F_maf=".$sample_full_path."/".$sample_name.".withmutect.maf\n";
|
| 1799 | + print MAF "F_maf_filtered=".$sample_full_path."/".$sample_name.".withmutect.filtered.maf\n"; |
1791 | 1800 | print MAF "RUNDIR=".$sample_full_path."\n";
|
1792 |
| - print MAF "F_log=".$sample_full_path."/vep.merged.withmutect.log"."\n"; |
1793 |
| - |
| 1801 | + |
| 1802 | + print MAF "F_log=".$sample_full_path."/vep.merged.withmutect.log"."\n"; |
1794 | 1803 | print MAF "cat > \${RUNDIR}/vep.merged.withmutect.input <<EOF\n";
|
1795 |
| - print MAF "merged.vep.vcf = ./merged.filtered.withmutect.vcf\n"; |
1796 |
| -# print MAF "merged.vep.vcf = ./merged.withmutect.vcf\n"; |
| 1804 | + print MAF "merged.vep.vcf = ./merged.withmutect.vcf\n"; |
1797 | 1805 | print MAF "merged.vep.output = ./merged.VEP.withmutect.vcf\n";
|
1798 | 1806 | print MAF "merged.vep.vep_cmd = $vepannot\n";
|
1799 | 1807 | print MAF "merged.vep.cachedir = $vepcache\n";
|
1800 |
| - #print MERGE "merged.vep.reffasta = /gscmnt/gc2525/dinglab/rmashl/Software/bin/VEP/v85/cache/homo_sapiens/85_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa\n"; |
1801 | 1808 | print MAF "merged.vep.reffasta = $f_ref_annot\n";
|
1802 | 1809 | print MAF "merged.vep.assembly = GRCh38\n";
|
1803 | 1810 | print MAF "EOF\n";
|
1804 |
| - print MAF "rm \${F_log}\n"; |
1805 |
| -# print MAF "merged.vep.vcf = ./merged.filtered.vcf\n"; |
1806 |
| -# print MAF "merged.vep.output = ./merged.VEP.vcf\n"; |
1807 |
| -# print MAF "merged.vep.vep_cmd = $vepcmd\n"; |
1808 |
| -# print MAF "merged.vep.cachedir = $vepcache\n"; |
1809 |
| - #print MERGE "merged.vep.reffasta = /gscmnt/gc2525/dinglab/rmashl/Software/bin/VEP/v85/cache/homo_sapiens/85_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa\n"; |
1810 |
| -# print MAF "merged.vep.reffasta = $f_ref_annot\n"; |
1811 |
| - # print MAF "merged.vep.assembly = GRCh37\n"; |
1812 |
| - # print MAF "EOF\n"; |
1813 |
| - # print MERGE "else\n"; |
1814 |
| - print MAF " ".$run_script_path."vaf_filter_v1.3.pl \${RUNDIR} $minvaf $mincov_t $mincov_n $maxindsize\n"; |
1815 |
| - #print MAF " ".$run_script_path."vaf_filter_michigan_washu.pl \${RUNDIR}\n"; |
1816 |
| - #print MAF " ".$run_script_path."vaf_all_callers.pl \${RUNDIR}\n"; |
| 1811 | + print MAF "rm \${F_log}\n"; |
| 1812 | + |
| 1813 | + print MAF "F_log_filtered=".$sample_full_path."/vep.merged.withmutect.filtered.log"."\n"; |
| 1814 | + print MAF "cat > \${RUNDIR}/vep.merged.withmutect.filtered.input <<EOF\n"; |
| 1815 | + print MAF "merged.vep.vcf = ./merged.filtered.withmutect.vcf\n"; |
| 1816 | + print MAF "merged.vep.output = ./merged.VEP.withmutect.filtered.vcf\n"; |
| 1817 | + print MAF "merged.vep.vep_cmd = $vepannot\n"; |
| 1818 | + print MAF "merged.vep.cachedir = $vepcache\n"; |
| 1819 | + print MAF "merged.vep.reffasta = $f_ref_annot\n"; |
| 1820 | + print MAF "merged.vep.assembly = GRCh38\n"; |
| 1821 | + print MAF "EOF\n"; |
| 1822 | + print MAF "rm \${F_log_filtered}\n"; |
| 1823 | + |
| 1824 | + ### vep and vcf2maf annotation for all variants to get the annotated gene name for each variant ## |
1817 | 1825 | print MAF "cd \${RUNDIR}\n";
|
1818 | 1826 | print MAF ". $script_dir/set_envvars\n";
|
1819 |
| - print MAF " ".$run_script_path."vep_annotator.pl ./vep.merged.withmutect.input >&./vep.merged.withmutect.log\n"; |
| 1827 | + print MAF " ".$run_script_path."vep_annotator.pl ./vep.merged.withmutect.input >&./vep.merged.withmutect.log\n"; |
1820 | 1828 | print MAF "rm \${F_VCF_2}\n";
|
1821 | 1829 | print MAF "rm \${F_VEP_2}\n";
|
1822 | 1830 | print MAF "ln -s \${F_VCF_1} \${F_VCF_2}\n";
|
1823 | 1831 | print MAF "ln -s \${F_VEP_1} \${F_VEP_2}\n";
|
1824 |
| -# print MAF "cd |
1825 |
| -# print MAF " ".$run_script_path."vcf2maf.pl --input-vcf \${F_VCF_2} --output-maf \${F_maf} --tumor-id $sample_name\_T --normal-id $sample_name\_N --ref-fasta $f_ref_annot --filter-vcf $f_exac --file-tsl $TSL_DB\n"; |
1826 |
| - print MAF " ".$run_script_path."vcf2maf.pl --input-vcf \${F_VCF_2} --output-maf \${F_maf} --tumor-id $sample_name\_T --normal-id $sample_name\_N --ref-fasta $f_ref_annot --file-tsl $TSL_DB\n"; |
| 1832 | + print MAF " ".$run_script_path."vcf2maf.pl --input-vcf \${F_VCF_2} --output-maf \${F_maf} --tumor-id $sample_name\_T --normal-id $sample_name\_N --ref-fasta $f_ref_annot --file-tsl $TSL_DB\n"; |
| 1833 | + |
| 1834 | + ## do the filtering for variants and ignore tumor vaf > 0.05 for gene in smg ## |
| 1835 | + print MAF " ".$run_script_path."vaf_filter_v1.4.pl \${RUNDIR} $sample_name $minvaf $mincov_t $mincov_n $maxindsize $db_smg\n"; |
| 1836 | + |
| 1837 | + print MAF " ".$run_script_path."vep_annotator.pl ./vep.merged.withmutect.filtered.input >&./vep.merged.withmutect.filtered.log\n"; |
| 1838 | + print MAF "rm \${F_VCF_2_filtered}\n"; |
| 1839 | + print MAF "rm \${F_VEP_2_filtered}\n"; |
| 1840 | + print MAF "ln -s \${F_VCF_1_filtered} \${F_VCF_2_filtered}\n"; |
| 1841 | + print MAF "ln -s \${F_VEP_1_filtered} \${F_VEP_2_filtered}\n"; |
| 1842 | + print MAF " ".$run_script_path."vcf2maf.pl --input-vcf \${F_VCF_2_filtered} --output-maf \${F_maf_filtered} --tumor-id $sample_name\_T --normal-id $sample_name\_N --ref-fasta $f_ref_annot --file-tsl $TSL_DB\n"; |
1827 | 1843 | #print MAF " ".$run_script_path."splice_site_check.pl $sample_full_path\n";
|
1828 | 1844 | close MAF;
|
1829 | 1845 |
|
|
0 commit comments