Mercurial > repos > iuc > strelka_somatic
comparison strelka_somatic.xml @ 0:c06e033242df draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/strelka commit 2e445e7c519b2b77498cb74c03ca6ed12b22423a"
| author | iuc |
|---|---|
| date | Wed, 27 Jan 2021 14:46:27 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c06e033242df |
|---|---|
| 1 <?xml version="1.0"?> | |
| 2 <tool id="strelka_somatic" name="Strelka Somatic" version="@TOOL_VERSION@+@GALAXY_VERSION@"> | |
| 3 <description>@DESCRIPTION@ for somatic variation in tumor/normal sample pairs</description> | |
| 4 <macros> | |
| 5 <import>macros.xml</import> | |
| 6 </macros> | |
| 7 <expand macro="requirements"/> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 ## initialize | |
| 10 #if $normalBam.is_of_type('bam') | |
| 11 ln -s '$normalBam' './input_normal.bam' && | |
| 12 ln -s '$normalBam.metadata.bam_index' './input_normal.bam.bai' && | |
| 13 #elif $normalBam.is_of_type('cram') | |
| 14 ln -s '$normalBam' './input_normal.cram' && | |
| 15 ln -s '$normalBam.metadata.cram_index' './input_normal.cram.crai' && | |
| 16 #end if | |
| 17 #if $tumorBam.is_of_type('bam') | |
| 18 ln -s '$tumorBam' './input_tumor.bam' && | |
| 19 ln -s '$tumorBam.metadata.bam_index' './input_tumor.bam.bai' && | |
| 20 #elif $tumorBam.is_of_type('cram') | |
| 21 ln -s '$tumorBam' './input_tumor.cram' && | |
| 22 ln -s '$tumorBam.metadata.cram_index' './input_tumor.cram.crai' && | |
| 23 #end if | |
| 24 @INIT@ | |
| 25 | |
| 26 ## create workflow | |
| 27 configureStrelkaSomaticWorkflow.py | |
| 28 #if $normalBam.is_of_type('bam') | |
| 29 --normalBam ./input_normal.bam | |
| 30 #elif $normalBam.is_of_type('cram') | |
| 31 --normalBam ./input_normal.cram | |
| 32 #end if | |
| 33 #if $tumorBam.is_of_type('bam') | |
| 34 --tumorBam ./input_tumor.bam | |
| 35 #elif $tumorBam.is_of_type('cram') | |
| 36 --tumorBam ./input_tumor.cram | |
| 37 #end if | |
| 38 $oo.outputCallableRegions | |
| 39 @CREATE@ | |
| 40 | |
| 41 ## run workflow | |
| 42 @RUN@ | |
| 43 | |
| 44 ## decompress results if needed and move everything to final destinations | |
| 45 #if $oo.vcf_type == "decompressed" | |
| 46 && bgzip -d results/results/variants/somatic.indels.vcf.gz | |
| 47 && bgzip -d results/results/variants/somatic.snvs.vcf.gz | |
| 48 && mv results/results/variants/somatic.indels.vcf '$out_indels' | |
| 49 && mv results/results/variants/somatic.snvs.vcf '$out_snvs' | |
| 50 #else | |
| 51 && mv results/results/variants/somatic.indels.vcf.gz '$out_indels' | |
| 52 && mv results/results/variants/somatic.snvs.vcf.gz '$out_snvs' | |
| 53 #end if | |
| 54 #if $oo.outputCallableRegions | |
| 55 && bgzip -d results/results/regions/somatic.callable.regions.bed.gz | |
| 56 && mv results/results/regions/somatic.callable.regions.bed '$out_callable' | |
| 57 #end if | |
| 58 | |
| 59 ]]></command> | |
| 60 <configfiles> | |
| 61 <configfile name="config_file"> | |
| 62 ## parser cannot handle indents | |
| 63 [StrelkaSomatic] | |
| 64 depthFilterMultiple = $strelka.depthFilterMultiple | |
| 65 snvMaxFilteredBasecallFrac = $strelka.snvMaxFilteredBasecallFrac | |
| 66 snvMaxSpanningDeletionFrac = $strelka.snvMaxSpanningDeletionFrac | |
| 67 indelMaxWindowFilteredBasecallFrac = $strelka.indelMaxWindowFilteredBasecallFrac | |
| 68 ssnvPrior = $strelka.ssnvPrior | |
| 69 sindelPrior = $strelka.sindelPrior | |
| 70 ssnvNoise = $strelka.ssnvNoise | |
| 71 sindelNoiseFactor = $strelka.sindelNoiseFactor | |
| 72 ssnvNoiseStrandBiasFrac = $strelka.ssnvNoiseStrandBiasFrac | |
| 73 minTier1Mapq = $strelka.minTier1Mapq | |
| 74 minTier2Mapq = $strelka.minTier2Mapq | |
| 75 ssnvQuality_LowerBound = $strelka.ssnvQuality_LowerBound | |
| 76 sindelQuality_LowerBound = $strelka.sindelQuality_LowerBound | |
| 77 ssnvContamTolerance = $strelka.ssnvContamTolerance | |
| 78 indelContamTolerance = $strelka.indelContamTolerance | |
| 79 @CONFIG@ | |
| 80 </configfile> | |
| 81 </configfiles> | |
| 82 <inputs> | |
| 83 <param argument="--normalBam" type="data" format="bam,cram" multiple="false" label="Select normal sample file" help="In bam or cram format."/> | |
| 84 <param argument="--tumorBam" type="data" format="bam,cram" multiple="false" label="Select tumor sample file" help="In bam or cram format."/> | |
| 85 <expand macro="input_required"/> | |
| 86 <expand macro="calling_model" /> | |
| 87 <expand macro="calling_model_expert" /> | |
| 88 <expand macro="regions_select" /> | |
| 89 | |
| 90 <section name="oo" title="Output options" expanded="false"> | |
| 91 <expand macro="input_output"/> | |
| 92 <param argument="--outputCallableRegions" type="boolean" checked="false" truevalue="--outputCallableRegions" falsevalue="" label="Generate bed file describing somatic callable regions of the genome" help=""/> | |
| 93 </section> | |
| 94 | |
| 95 <section name="strelka" title="Strelka run configuration" expanded="false"> | |
| 96 <expand macro="input_strelka"/> | |
| 97 <param argument="depthFilterMultiple" type="float" value="3.0" label="Set depthFilterMultiple" help="If the depth filter is not skipped, all variants which occur at a depth greater than depthFilterMultiple*chromosome mean depth will be filtered out."/> | |
| 98 <param argument="snvMaxFilteredBasecallFrac" type="float" value="0.4" min="0.0" max="1.0" label="Set snvMaxFilteredBasecallFrac" help="Somatic SNV calls are filtered at sites where greater than this fraction of basecalls have been removed by the mismatch density filter in either sample."/> | |
| 99 <param argument="snvMaxSpanningDeletionFrac" type="float" value="0.75" min="0.0" max="1.0" label="Set snvMaxSpanningDeletionFrac" help="Somatic SNV calls are filtered at sites where greater than this fraction of overlapping reads contain deletions which span the SNV call site."/> | |
| 100 <param argument="indelMaxWindowFilteredBasecallFrac" type="float" value="0.3" min="0.0" max="1.0" label="Set indelMaxWindowFilteredBasecallFrac" help="Somatic indel calls are filtered if greater than this fraction of basecalls in a window extending 50 bases to each side of an indel's call position have been removed by the mismatch density filter."/> | |
| 101 <param argument="ssnvPrior" type="float" value="0.0001" min="0.0" label="Set ssnvPrior" help="Prior probability of a somatic snv or indel."/> | |
| 102 <param argument="sindelPrior" type="float" value="0.000001" min="0.0" label="Set sindelPrior" help="Prior probability of a somatic snv or indel."/> | |
| 103 <param argument="ssnvNoise" type="float" value="0.0000000005" min="0.0" label="Set ssnvNoise" help="Probability of an snv or indel noise allele NB: in the calling model a noise allele is shared in tumor and normal samples, but occurs at any frequency."/> | |
| 104 <param argument="sindelNoiseFactor" type="float" value="2.2" label="Set sindelNoiseFactor" help="Somatic indel noise factor."/> | |
| 105 <param argument="ssnvNoiseStrandBiasFrac" type="float" value="0.0" min="0.0" max="1.0" label="Set ssnvNoiseStrandBiasFrac" help="Fraction of snv noise attributed to strand-bias. It is not recommended to change this setting. However, if it is essential to turn the strand bias penalization off, the following is recommended: Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5, (1) set ssnvNoiseStrandBiasFrac = 0 (2) divide the current ssnvNoise value by 2."/> | |
| 106 <param argument="minTier1Mapq" type="integer" value="20" label="Set minTier1Mapq" help="Minimum MAPQ score for reads at tier1."/> | |
| 107 <param argument="minTier2Mapq" type="integer" value="0" label="Set minTier2Mapq" help="Minimum MAPQ score for reads at tier2."/> | |
| 108 <param argument="ssnvQuality_LowerBound" type="integer" value="15" label="Set ssnvQuality_LowerBound" help="Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are marked as filtered."/> | |
| 109 <param argument="sindelQuality_LowerBound" type="integer" value="40" label="Set sindelQuality_LowerBound" help="Somatic quality score (QSI_NT, NT=ref) below which somatic indels are marked as filtered."/> | |
| 110 <param argument="ssnvContamTolerance" type="float" value="0.15" min="0.0" max="1.0" label="Set ssnvContamTolerance" help="Tolerance of tumor contamination in the normal sample."/> | |
| 111 <param argument="indelContamTolerance" type="float" value="0.15" min="0.0" max="1.0" label="Set indelContamTolerance" help="Tolerance of tumor contamination in the normal sample."/> | |
| 112 </section> | |
| 113 </inputs> | |
| 114 <outputs> | |
| 115 <data name="out_indels" format="vcf" label="${tool.name} on ${on_string}, Indels, vcf"> | |
| 116 <change_format> | |
| 117 <when input="oo.vcf_type" value="compressed" format="vcf_bgzip" /> | |
| 118 </change_format> | |
| 119 </data> | |
| 120 <data name="out_snvs" format="vcf" label="${tool.name} on ${on_string}, SNVs, vcf"> | |
| 121 <change_format> | |
| 122 <when input="oo.vcf_type" value="compressed" format="vcf_bgzip" /> | |
| 123 </change_format> | |
| 124 </data> | |
| 125 <data name="out_callable" format="bed" label="${tool.name} on ${on_string}, Callable regions, bed"> | |
| 126 <filter>bool(oo['outputCallableRegions'])</filter> | |
| 127 </data> | |
| 128 </outputs> | |
| 129 <tests> | |
| 130 <!-- #1; input bam, decompressed --> | |
| 131 <test expect_num_outputs="2"> | |
| 132 <param name="normalBam" value="sample1.bam" ftype="bam"/> | |
| 133 <param name="tumorBam" value="sample2.bam" ftype="bam"/> | |
| 134 <conditional name="ref_cond"> | |
| 135 <param name="ref_sel" value="history"/> | |
| 136 <param name="ref" value="hg98.fa" ftype="fasta"/> | |
| 137 </conditional> | |
| 138 <section name="oo"> | |
| 139 <param name="vcf_type" value="decompressed"/> | |
| 140 </section> | |
| 141 <output name="out_indels" ftype="vcf"> | |
| 142 <assert_contents> | |
| 143 <has_n_lines n="41"/> | |
| 144 <has_line_matching expression="#CHROM	POS	.+"/> | |
| 145 <has_line_matching expression="demo20	3664	.+"/> | |
| 146 </assert_contents> | |
| 147 </output> | |
| 148 <output name="out_snvs" ftype="vcf"> | |
| 149 <assert_contents> | |
| 150 <has_n_lines n="52"/> | |
| 151 <has_line_matching expression="#CHROM	POS	.+"/> | |
| 152 <has_line_matching expression="demo20	3537	.+"/> | |
| 153 </assert_contents> | |
| 154 </output> | |
| 155 </test> | |
| 156 <!-- #2; input cram, compressed --> | |
| 157 <test expect_num_outputs="2"> | |
| 158 <param name="normalBam" value="sample1.cram" ftype="cram"/> | |
| 159 <param name="tumorBam" value="sample2.cram" ftype="cram"/> | |
| 160 <conditional name="ref_cond"> | |
| 161 <param name="ref_sel" value="history"/> | |
| 162 <param name="ref" value="hg98.fa" ftype="fasta"/> | |
| 163 </conditional> | |
| 164 <section name="oo"> | |
| 165 <param name="vcf_type" value="compressed"/> | |
| 166 </section> | |
| 167 <output name="out_indels" file="indels_test2.vcf.gz" ftype="vcf_bgzip" compare="sim_size"/> | |
| 168 <output name="out_snvs" file="snvs_test2.vcf.gz" ftype="vcf_bgzip" compare="sim_size"/> | |
| 169 </test> | |
| 170 <!-- #3; input bam, decompressed, no defaults --> | |
| 171 <test expect_num_outputs="3"> | |
| 172 <param name="normalBam" value="sample1.bam" ftype="bam"/> | |
| 173 <param name="tumorBam" value="sample2.bam" ftype="bam"/> | |
| 174 <conditional name="ref_cond"> | |
| 175 <param name="ref_sel" value="history"/> | |
| 176 <param name="ref" value="hg98.fa" ftype="fasta"/> | |
| 177 </conditional> | |
| 178 <param name="optimization" value="--exome" /> | |
| 179 <section name="oo"> | |
| 180 <param name="vcf_type" value="decompressed"/> | |
| 181 <param name="outputCallableRegions" value="true"/> | |
| 182 </section> | |
| 183 <section name="strelka"> | |
| 184 <param name="depthFilterMultiple" value="2.8"/> | |
| 185 <param name="snvMaxFilteredBasecallFrac" value="0.5"/> | |
| 186 <param name="snvMaxSpanningDeletionFrac" value="0.76"/> | |
| 187 <param name="indelMaxWindowFilteredBasecallFrac" value="0.4"/> | |
| 188 <param name="ssnvPrior" value="0.0002"/> | |
| 189 <param name="sindelPrior" value="0.000002"/> | |
| 190 <param name="ssnvNoise" value="0.0000000004"/> | |
| 191 <param name="sindelNoiseFactor" value="2.1"/> | |
| 192 <param name="ssnvNoiseStrandBiasFrac" value="0.1"/> | |
| 193 <param name="minTier1Mapq" value="21"/> | |
| 194 <param name="minTier2Mapq" value="1"/> | |
| 195 <param name="ssnvQuality_LowerBound" value="14"/> | |
| 196 <param name="sindelQuality_LowerBound" value="41"/> | |
| 197 <param name="ssnvContamTolerance" value="0.16"/> | |
| 198 <param name="indelContamTolerance" value="0.16"/> | |
| 199 <param name="maxIndelSize" value="50"/> | |
| 200 </section> | |
| 201 <output name="out_indels" ftype="vcf"> | |
| 202 <assert_contents> | |
| 203 <has_n_lines n="39"/> | |
| 204 <has_line_matching expression="#CHROM	POS	.+"/> | |
| 205 <has_line_matching expression="demo20	3664	.+"/> | |
| 206 </assert_contents> | |
| 207 </output> | |
| 208 <output name="out_snvs" ftype="vcf"> | |
| 209 <assert_contents> | |
| 210 <has_n_lines n="51"/> | |
| 211 <has_line_matching expression="#CHROM	POS	.+"/> | |
| 212 <has_line_matching expression="demo20	3537	.+"/> | |
| 213 </assert_contents> | |
| 214 </output> | |
| 215 <output name="out_callable" ftype="bed"> | |
| 216 <assert_contents> | |
| 217 <has_n_lines n="136"/> | |
| 218 <has_line_matching expression="demo20	3971	.+"/> | |
| 219 </assert_contents> | |
| 220 </output> | |
| 221 </test> | |
| 222 <!-- #4; bam, reference cached --> | |
| 223 <test expect_num_outputs="2"> | |
| 224 <param name="normalBam" dbkey="hg19" value="sample1.bam" ftype="bam"/> | |
| 225 <param name="tumorBam" value="sample2.bam" ftype="bam"/> | |
| 226 <conditional name="ref_cond"> | |
| 227 <param name="ref_sel" value="cached"/> | |
| 228 <param name="ref" value="hg19"/> | |
| 229 </conditional> | |
| 230 <section name="oo"> | |
| 231 <param name="vcf_type" value="decompressed"/> | |
| 232 </section> | |
| 233 <output name="out_indels" ftype="vcf"> | |
| 234 <assert_contents> | |
| 235 <has_n_lines n="41"/> | |
| 236 <has_line_matching expression="#CHROM	POS	.+"/> | |
| 237 <has_line_matching expression="demo20	3664	.+"/> | |
| 238 </assert_contents> | |
| 239 </output> | |
| 240 <output name="out_snvs" ftype="vcf"> | |
| 241 <assert_contents> | |
| 242 <has_n_lines n="52"/> | |
| 243 <has_line_matching expression="#CHROM	POS	.+"/> | |
| 244 <has_line_matching expression="demo20	3537	.+"/> | |
| 245 </assert_contents> | |
| 246 </output> | |
| 247 </test> | |
| 248 </tests> | |
| 249 <help><![CDATA[ | |
| 250 .. class:: infomark | |
| 251 | |
| 252 **What it does** | |
| 253 | |
| 254 @HELP_STRELKA@ | |
| 255 | |
| 256 The somatic calling model improves on the original Strelka method for liquid and late-stage tumor analysis by accounting for possible tumor cell contamination in the normal sample. A final empirical variant re-scoring step using random forest models trained on various call quality features has been added to both callers to further improve precision. | |
| 257 | |
| 258 **Input** | |
| 259 | |
| 260 @HELP_INPUT@ | |
| 261 | |
| 262 **Output** | |
| 263 | |
| 264 *INDEL* | |
| 265 | |
| 266 All somatic indels inferred in the tumor sample in VCF format. | |
| 267 | |
| 268 *SNVS* | |
| 269 | |
| 270 All somatic SNVs inferred in the tumor sample in VCF format. | |
| 271 | |
| 272 *Callability* | |
| 273 | |
| 274 The somatic variant caller can be configured with the option --outputCallableRegions, which will extend the somatic SNV quality model calculation to be applied as a test of somatic SNV callability at all positions in the genome. The outcome of this callability calculation will be summarized in a BED-formatted callability track. This BED track contains regions which are determined to be callable, indicating that there is sufficient evidence to either call a somatic SNV or assert the absence of a somatic SNV with a variant frequency of 10% or greater. Both somatic and non-somatic sites are determined to be 'callable' if the somatic or non-somatic quality threshold is at least 15. | |
| 275 | |
| 276 .. class:: infomark | |
| 277 | |
| 278 **References** | |
| 279 | |
| 280 @HELP_REFERENCES@ | |
| 281 ]]></help> | |
| 282 <expand macro="citations"/> | |
| 283 </tool> |
