comparison strelka_somatic.xml @ 0:c06e033242df draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/strelka commit 2e445e7c519b2b77498cb74c03ca6ed12b22423a"
author iuc
date Wed, 27 Jan 2021 14:46:27 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c06e033242df
1 <?xml version="1.0"?>
2 <tool id="strelka_somatic" name="Strelka Somatic" version="@TOOL_VERSION@+@GALAXY_VERSION@">
3 <description>@DESCRIPTION@ for somatic variation in tumor/normal sample pairs</description>
4 <macros>
5 <import>macros.xml</import>
6 </macros>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9 ## initialize
10 #if $normalBam.is_of_type('bam')
11 ln -s '$normalBam' './input_normal.bam' &&
12 ln -s '$normalBam.metadata.bam_index' './input_normal.bam.bai' &&
13 #elif $normalBam.is_of_type('cram')
14 ln -s '$normalBam' './input_normal.cram' &&
15 ln -s '$normalBam.metadata.cram_index' './input_normal.cram.crai' &&
16 #end if
17 #if $tumorBam.is_of_type('bam')
18 ln -s '$tumorBam' './input_tumor.bam' &&
19 ln -s '$tumorBam.metadata.bam_index' './input_tumor.bam.bai' &&
20 #elif $tumorBam.is_of_type('cram')
21 ln -s '$tumorBam' './input_tumor.cram' &&
22 ln -s '$tumorBam.metadata.cram_index' './input_tumor.cram.crai' &&
23 #end if
24 @INIT@
25
26 ## create workflow
27 configureStrelkaSomaticWorkflow.py
28 #if $normalBam.is_of_type('bam')
29 --normalBam ./input_normal.bam
30 #elif $normalBam.is_of_type('cram')
31 --normalBam ./input_normal.cram
32 #end if
33 #if $tumorBam.is_of_type('bam')
34 --tumorBam ./input_tumor.bam
35 #elif $tumorBam.is_of_type('cram')
36 --tumorBam ./input_tumor.cram
37 #end if
38 $oo.outputCallableRegions
39 @CREATE@
40
41 ## run workflow
42 @RUN@
43
44 ## decompress results if needed and move everything to final destinations
45 #if $oo.vcf_type == "decompressed"
46 && bgzip -d results/results/variants/somatic.indels.vcf.gz
47 && bgzip -d results/results/variants/somatic.snvs.vcf.gz
48 && mv results/results/variants/somatic.indels.vcf '$out_indels'
49 && mv results/results/variants/somatic.snvs.vcf '$out_snvs'
50 #else
51 && mv results/results/variants/somatic.indels.vcf.gz '$out_indels'
52 && mv results/results/variants/somatic.snvs.vcf.gz '$out_snvs'
53 #end if
54 #if $oo.outputCallableRegions
55 && bgzip -d results/results/regions/somatic.callable.regions.bed.gz
56 && mv results/results/regions/somatic.callable.regions.bed '$out_callable'
57 #end if
58
59 ]]></command>
60 <configfiles>
61 <configfile name="config_file">
62 ## parser cannot handle indents
63 [StrelkaSomatic]
64 depthFilterMultiple = $strelka.depthFilterMultiple
65 snvMaxFilteredBasecallFrac = $strelka.snvMaxFilteredBasecallFrac
66 snvMaxSpanningDeletionFrac = $strelka.snvMaxSpanningDeletionFrac
67 indelMaxWindowFilteredBasecallFrac = $strelka.indelMaxWindowFilteredBasecallFrac
68 ssnvPrior = $strelka.ssnvPrior
69 sindelPrior = $strelka.sindelPrior
70 ssnvNoise = $strelka.ssnvNoise
71 sindelNoiseFactor = $strelka.sindelNoiseFactor
72 ssnvNoiseStrandBiasFrac = $strelka.ssnvNoiseStrandBiasFrac
73 minTier1Mapq = $strelka.minTier1Mapq
74 minTier2Mapq = $strelka.minTier2Mapq
75 ssnvQuality_LowerBound = $strelka.ssnvQuality_LowerBound
76 sindelQuality_LowerBound = $strelka.sindelQuality_LowerBound
77 ssnvContamTolerance = $strelka.ssnvContamTolerance
78 indelContamTolerance = $strelka.indelContamTolerance
79 @CONFIG@
80 </configfile>
81 </configfiles>
82 <inputs>
83 <param argument="--normalBam" type="data" format="bam,cram" multiple="false" label="Select normal sample file" help="In bam or cram format."/>
84 <param argument="--tumorBam" type="data" format="bam,cram" multiple="false" label="Select tumor sample file" help="In bam or cram format."/>
85 <expand macro="input_required"/>
86 <expand macro="calling_model" />
87 <expand macro="calling_model_expert" />
88 <expand macro="regions_select" />
89
90 <section name="oo" title="Output options" expanded="false">
91 <expand macro="input_output"/>
92 <param argument="--outputCallableRegions" type="boolean" checked="false" truevalue="--outputCallableRegions" falsevalue="" label="Generate bed file describing somatic callable regions of the genome" help=""/>
93 </section>
94
95 <section name="strelka" title="Strelka run configuration" expanded="false">
96 <expand macro="input_strelka"/>
97 <param argument="depthFilterMultiple" type="float" value="3.0" label="Set depthFilterMultiple" help="If the depth filter is not skipped, all variants which occur at a depth greater than depthFilterMultiple*chromosome mean depth will be filtered out."/>
98 <param argument="snvMaxFilteredBasecallFrac" type="float" value="0.4" min="0.0" max="1.0" label="Set snvMaxFilteredBasecallFrac" help="Somatic SNV calls are filtered at sites where greater than this fraction of basecalls have been removed by the mismatch density filter in either sample."/>
99 <param argument="snvMaxSpanningDeletionFrac" type="float" value="0.75" min="0.0" max="1.0" label="Set snvMaxSpanningDeletionFrac" help="Somatic SNV calls are filtered at sites where greater than this fraction of overlapping reads contain deletions which span the SNV call site."/>
100 <param argument="indelMaxWindowFilteredBasecallFrac" type="float" value="0.3" min="0.0" max="1.0" label="Set indelMaxWindowFilteredBasecallFrac" help="Somatic indel calls are filtered if greater than this fraction of basecalls in a window extending 50 bases to each side of an indel's call position have been removed by the mismatch density filter."/>
101 <param argument="ssnvPrior" type="float" value="0.0001" min="0.0" label="Set ssnvPrior" help="Prior probability of a somatic snv or indel."/>
102 <param argument="sindelPrior" type="float" value="0.000001" min="0.0" label="Set sindelPrior" help="Prior probability of a somatic snv or indel."/>
103 <param argument="ssnvNoise" type="float" value="0.0000000005" min="0.0" label="Set ssnvNoise" help="Probability of an snv or indel noise allele NB: in the calling model a noise allele is shared in tumor and normal samples, but occurs at any frequency."/>
104 <param argument="sindelNoiseFactor" type="float" value="2.2" label="Set sindelNoiseFactor" help="Somatic indel noise factor."/>
105 <param argument="ssnvNoiseStrandBiasFrac" type="float" value="0.0" min="0.0" max="1.0" label="Set ssnvNoiseStrandBiasFrac" help="Fraction of snv noise attributed to strand-bias. It is not recommended to change this setting. However, if it is essential to turn the strand bias penalization off, the following is recommended: Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5, (1) set ssnvNoiseStrandBiasFrac = 0 (2) divide the current ssnvNoise value by 2."/>
106 <param argument="minTier1Mapq" type="integer" value="20" label="Set minTier1Mapq" help="Minimum MAPQ score for reads at tier1."/>
107 <param argument="minTier2Mapq" type="integer" value="0" label="Set minTier2Mapq" help="Minimum MAPQ score for reads at tier2."/>
108 <param argument="ssnvQuality_LowerBound" type="integer" value="15" label="Set ssnvQuality_LowerBound" help="Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are marked as filtered."/>
109 <param argument="sindelQuality_LowerBound" type="integer" value="40" label="Set sindelQuality_LowerBound" help="Somatic quality score (QSI_NT, NT=ref) below which somatic indels are marked as filtered."/>
110 <param argument="ssnvContamTolerance" type="float" value="0.15" min="0.0" max="1.0" label="Set ssnvContamTolerance" help="Tolerance of tumor contamination in the normal sample."/>
111 <param argument="indelContamTolerance" type="float" value="0.15" min="0.0" max="1.0" label="Set indelContamTolerance" help="Tolerance of tumor contamination in the normal sample."/>
112 </section>
113 </inputs>
114 <outputs>
115 <data name="out_indels" format="vcf" label="${tool.name} on ${on_string}, Indels, vcf">
116 <change_format>
117 <when input="oo.vcf_type" value="compressed" format="vcf_bgzip" />
118 </change_format>
119 </data>
120 <data name="out_snvs" format="vcf" label="${tool.name} on ${on_string}, SNVs, vcf">
121 <change_format>
122 <when input="oo.vcf_type" value="compressed" format="vcf_bgzip" />
123 </change_format>
124 </data>
125 <data name="out_callable" format="bed" label="${tool.name} on ${on_string}, Callable regions, bed">
126 <filter>bool(oo['outputCallableRegions'])</filter>
127 </data>
128 </outputs>
129 <tests>
130 <!-- #1; input bam, decompressed -->
131 <test expect_num_outputs="2">
132 <param name="normalBam" value="sample1.bam" ftype="bam"/>
133 <param name="tumorBam" value="sample2.bam" ftype="bam"/>
134 <conditional name="ref_cond">
135 <param name="ref_sel" value="history"/>
136 <param name="ref" value="hg98.fa" ftype="fasta"/>
137 </conditional>
138 <section name="oo">
139 <param name="vcf_type" value="decompressed"/>
140 </section>
141 <output name="out_indels" ftype="vcf">
142 <assert_contents>
143 <has_n_lines n="41"/>
144 <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
145 <has_line_matching expression="demo20&#009;3664&#009;.+"/>
146 </assert_contents>
147 </output>
148 <output name="out_snvs" ftype="vcf">
149 <assert_contents>
150 <has_n_lines n="52"/>
151 <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
152 <has_line_matching expression="demo20&#009;3537&#009;.+"/>
153 </assert_contents>
154 </output>
155 </test>
156 <!-- #2; input cram, compressed -->
157 <test expect_num_outputs="2">
158 <param name="normalBam" value="sample1.cram" ftype="cram"/>
159 <param name="tumorBam" value="sample2.cram" ftype="cram"/>
160 <conditional name="ref_cond">
161 <param name="ref_sel" value="history"/>
162 <param name="ref" value="hg98.fa" ftype="fasta"/>
163 </conditional>
164 <section name="oo">
165 <param name="vcf_type" value="compressed"/>
166 </section>
167 <output name="out_indels" file="indels_test2.vcf.gz" ftype="vcf_bgzip" compare="sim_size"/>
168 <output name="out_snvs" file="snvs_test2.vcf.gz" ftype="vcf_bgzip" compare="sim_size"/>
169 </test>
170 <!-- #3; input bam, decompressed, no defaults -->
171 <test expect_num_outputs="3">
172 <param name="normalBam" value="sample1.bam" ftype="bam"/>
173 <param name="tumorBam" value="sample2.bam" ftype="bam"/>
174 <conditional name="ref_cond">
175 <param name="ref_sel" value="history"/>
176 <param name="ref" value="hg98.fa" ftype="fasta"/>
177 </conditional>
178 <param name="optimization" value="--exome" />
179 <section name="oo">
180 <param name="vcf_type" value="decompressed"/>
181 <param name="outputCallableRegions" value="true"/>
182 </section>
183 <section name="strelka">
184 <param name="depthFilterMultiple" value="2.8"/>
185 <param name="snvMaxFilteredBasecallFrac" value="0.5"/>
186 <param name="snvMaxSpanningDeletionFrac" value="0.76"/>
187 <param name="indelMaxWindowFilteredBasecallFrac" value="0.4"/>
188 <param name="ssnvPrior" value="0.0002"/>
189 <param name="sindelPrior" value="0.000002"/>
190 <param name="ssnvNoise" value="0.0000000004"/>
191 <param name="sindelNoiseFactor" value="2.1"/>
192 <param name="ssnvNoiseStrandBiasFrac" value="0.1"/>
193 <param name="minTier1Mapq" value="21"/>
194 <param name="minTier2Mapq" value="1"/>
195 <param name="ssnvQuality_LowerBound" value="14"/>
196 <param name="sindelQuality_LowerBound" value="41"/>
197 <param name="ssnvContamTolerance" value="0.16"/>
198 <param name="indelContamTolerance" value="0.16"/>
199 <param name="maxIndelSize" value="50"/>
200 </section>
201 <output name="out_indels" ftype="vcf">
202 <assert_contents>
203 <has_n_lines n="39"/>
204 <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
205 <has_line_matching expression="demo20&#009;3664&#009;.+"/>
206 </assert_contents>
207 </output>
208 <output name="out_snvs" ftype="vcf">
209 <assert_contents>
210 <has_n_lines n="51"/>
211 <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
212 <has_line_matching expression="demo20&#009;3537&#009;.+"/>
213 </assert_contents>
214 </output>
215 <output name="out_callable" ftype="bed">
216 <assert_contents>
217 <has_n_lines n="136"/>
218 <has_line_matching expression="demo20&#009;3971&#009;.+"/>
219 </assert_contents>
220 </output>
221 </test>
222 <!-- #4; bam, reference cached -->
223 <test expect_num_outputs="2">
224 <param name="normalBam" dbkey="hg19" value="sample1.bam" ftype="bam"/>
225 <param name="tumorBam" value="sample2.bam" ftype="bam"/>
226 <conditional name="ref_cond">
227 <param name="ref_sel" value="cached"/>
228 <param name="ref" value="hg19"/>
229 </conditional>
230 <section name="oo">
231 <param name="vcf_type" value="decompressed"/>
232 </section>
233 <output name="out_indels" ftype="vcf">
234 <assert_contents>
235 <has_n_lines n="41"/>
236 <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
237 <has_line_matching expression="demo20&#009;3664&#009;.+"/>
238 </assert_contents>
239 </output>
240 <output name="out_snvs" ftype="vcf">
241 <assert_contents>
242 <has_n_lines n="52"/>
243 <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
244 <has_line_matching expression="demo20&#009;3537&#009;.+"/>
245 </assert_contents>
246 </output>
247 </test>
248 </tests>
249 <help><![CDATA[
250 .. class:: infomark
251
252 **What it does**
253
254 @HELP_STRELKA@
255
256 The somatic calling model improves on the original Strelka method for liquid and late-stage tumor analysis by accounting for possible tumor cell contamination in the normal sample. A final empirical variant re-scoring step using random forest models trained on various call quality features has been added to both callers to further improve precision.
257
258 **Input**
259
260 @HELP_INPUT@
261
262 **Output**
263
264 *INDEL*
265
266 All somatic indels inferred in the tumor sample in VCF format.
267
268 *SNVS*
269
270 All somatic SNVs inferred in the tumor sample in VCF format.
271
272 *Callability*
273
274 The somatic variant caller can be configured with the option --outputCallableRegions, which will extend the somatic SNV quality model calculation to be applied as a test of somatic SNV callability at all positions in the genome. The outcome of this callability calculation will be summarized in a BED-formatted callability track. This BED track contains regions which are determined to be callable, indicating that there is sufficient evidence to either call a somatic SNV or assert the absence of a somatic SNV with a variant frequency of 10% or greater. Both somatic and non-somatic sites are determined to be 'callable' if the somatic or non-somatic quality threshold is at least 15.
275
276 .. class:: infomark
277
278 **References**
279
280 @HELP_REFERENCES@
281 ]]></help>
282 <expand macro="citations"/>
283 </tool>