Mercurial > repos > iuc > strelka_somatic
view strelka_somatic.xml @ 0:c06e033242df draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/strelka commit 2e445e7c519b2b77498cb74c03ca6ed12b22423a"
| author | iuc |
|---|---|
| date | Wed, 27 Jan 2021 14:46:27 +0000 |
| parents | |
| children |
line wrap: on
line source
<?xml version="1.0"?> <tool id="strelka_somatic" name="Strelka Somatic" version="@TOOL_VERSION@+@GALAXY_VERSION@"> <description>@DESCRIPTION@ for somatic variation in tumor/normal sample pairs</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ ## initialize #if $normalBam.is_of_type('bam') ln -s '$normalBam' './input_normal.bam' && ln -s '$normalBam.metadata.bam_index' './input_normal.bam.bai' && #elif $normalBam.is_of_type('cram') ln -s '$normalBam' './input_normal.cram' && ln -s '$normalBam.metadata.cram_index' './input_normal.cram.crai' && #end if #if $tumorBam.is_of_type('bam') ln -s '$tumorBam' './input_tumor.bam' && ln -s '$tumorBam.metadata.bam_index' './input_tumor.bam.bai' && #elif $tumorBam.is_of_type('cram') ln -s '$tumorBam' './input_tumor.cram' && ln -s '$tumorBam.metadata.cram_index' './input_tumor.cram.crai' && #end if @INIT@ ## create workflow configureStrelkaSomaticWorkflow.py #if $normalBam.is_of_type('bam') --normalBam ./input_normal.bam #elif $normalBam.is_of_type('cram') --normalBam ./input_normal.cram #end if #if $tumorBam.is_of_type('bam') --tumorBam ./input_tumor.bam #elif $tumorBam.is_of_type('cram') --tumorBam ./input_tumor.cram #end if $oo.outputCallableRegions @CREATE@ ## run workflow @RUN@ ## decompress results if needed and move everything to final destinations #if $oo.vcf_type == "decompressed" && bgzip -d results/results/variants/somatic.indels.vcf.gz && bgzip -d results/results/variants/somatic.snvs.vcf.gz && mv results/results/variants/somatic.indels.vcf '$out_indels' && mv results/results/variants/somatic.snvs.vcf '$out_snvs' #else && mv results/results/variants/somatic.indels.vcf.gz '$out_indels' && mv results/results/variants/somatic.snvs.vcf.gz '$out_snvs' #end if #if $oo.outputCallableRegions && bgzip -d results/results/regions/somatic.callable.regions.bed.gz && mv results/results/regions/somatic.callable.regions.bed '$out_callable' #end if ]]></command> <configfiles> <configfile name="config_file"> ## parser cannot handle indents [StrelkaSomatic] depthFilterMultiple = $strelka.depthFilterMultiple snvMaxFilteredBasecallFrac = $strelka.snvMaxFilteredBasecallFrac snvMaxSpanningDeletionFrac = $strelka.snvMaxSpanningDeletionFrac indelMaxWindowFilteredBasecallFrac = $strelka.indelMaxWindowFilteredBasecallFrac ssnvPrior = $strelka.ssnvPrior sindelPrior = $strelka.sindelPrior ssnvNoise = $strelka.ssnvNoise sindelNoiseFactor = $strelka.sindelNoiseFactor ssnvNoiseStrandBiasFrac = $strelka.ssnvNoiseStrandBiasFrac minTier1Mapq = $strelka.minTier1Mapq minTier2Mapq = $strelka.minTier2Mapq ssnvQuality_LowerBound = $strelka.ssnvQuality_LowerBound sindelQuality_LowerBound = $strelka.sindelQuality_LowerBound ssnvContamTolerance = $strelka.ssnvContamTolerance indelContamTolerance = $strelka.indelContamTolerance @CONFIG@ </configfile> </configfiles> <inputs> <param argument="--normalBam" type="data" format="bam,cram" multiple="false" label="Select normal sample file" help="In bam or cram format."/> <param argument="--tumorBam" type="data" format="bam,cram" multiple="false" label="Select tumor sample file" help="In bam or cram format."/> <expand macro="input_required"/> <expand macro="calling_model" /> <expand macro="calling_model_expert" /> <expand macro="regions_select" /> <section name="oo" title="Output options" expanded="false"> <expand macro="input_output"/> <param argument="--outputCallableRegions" type="boolean" checked="false" truevalue="--outputCallableRegions" falsevalue="" label="Generate bed file describing somatic callable regions of the genome" help=""/> </section> <section name="strelka" title="Strelka run configuration" expanded="false"> <expand macro="input_strelka"/> <param argument="depthFilterMultiple" type="float" value="3.0" label="Set depthFilterMultiple" help="If the depth filter is not skipped, all variants which occur at a depth greater than depthFilterMultiple*chromosome mean depth will be filtered out."/> <param argument="snvMaxFilteredBasecallFrac" type="float" value="0.4" min="0.0" max="1.0" label="Set snvMaxFilteredBasecallFrac" help="Somatic SNV calls are filtered at sites where greater than this fraction of basecalls have been removed by the mismatch density filter in either sample."/> <param argument="snvMaxSpanningDeletionFrac" type="float" value="0.75" min="0.0" max="1.0" label="Set snvMaxSpanningDeletionFrac" help="Somatic SNV calls are filtered at sites where greater than this fraction of overlapping reads contain deletions which span the SNV call site."/> <param argument="indelMaxWindowFilteredBasecallFrac" type="float" value="0.3" min="0.0" max="1.0" label="Set indelMaxWindowFilteredBasecallFrac" help="Somatic indel calls are filtered if greater than this fraction of basecalls in a window extending 50 bases to each side of an indel's call position have been removed by the mismatch density filter."/> <param argument="ssnvPrior" type="float" value="0.0001" min="0.0" label="Set ssnvPrior" help="Prior probability of a somatic snv or indel."/> <param argument="sindelPrior" type="float" value="0.000001" min="0.0" label="Set sindelPrior" help="Prior probability of a somatic snv or indel."/> <param argument="ssnvNoise" type="float" value="0.0000000005" min="0.0" label="Set ssnvNoise" help="Probability of an snv or indel noise allele NB: in the calling model a noise allele is shared in tumor and normal samples, but occurs at any frequency."/> <param argument="sindelNoiseFactor" type="float" value="2.2" label="Set sindelNoiseFactor" help="Somatic indel noise factor."/> <param argument="ssnvNoiseStrandBiasFrac" type="float" value="0.0" min="0.0" max="1.0" label="Set ssnvNoiseStrandBiasFrac" help="Fraction of snv noise attributed to strand-bias. It is not recommended to change this setting. However, if it is essential to turn the strand bias penalization off, the following is recommended: Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5, (1) set ssnvNoiseStrandBiasFrac = 0 (2) divide the current ssnvNoise value by 2."/> <param argument="minTier1Mapq" type="integer" value="20" label="Set minTier1Mapq" help="Minimum MAPQ score for reads at tier1."/> <param argument="minTier2Mapq" type="integer" value="0" label="Set minTier2Mapq" help="Minimum MAPQ score for reads at tier2."/> <param argument="ssnvQuality_LowerBound" type="integer" value="15" label="Set ssnvQuality_LowerBound" help="Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are marked as filtered."/> <param argument="sindelQuality_LowerBound" type="integer" value="40" label="Set sindelQuality_LowerBound" help="Somatic quality score (QSI_NT, NT=ref) below which somatic indels are marked as filtered."/> <param argument="ssnvContamTolerance" type="float" value="0.15" min="0.0" max="1.0" label="Set ssnvContamTolerance" help="Tolerance of tumor contamination in the normal sample."/> <param argument="indelContamTolerance" type="float" value="0.15" min="0.0" max="1.0" label="Set indelContamTolerance" help="Tolerance of tumor contamination in the normal sample."/> </section> </inputs> <outputs> <data name="out_indels" format="vcf" label="${tool.name} on ${on_string}, Indels, vcf"> <change_format> <when input="oo.vcf_type" value="compressed" format="vcf_bgzip" /> </change_format> </data> <data name="out_snvs" format="vcf" label="${tool.name} on ${on_string}, SNVs, vcf"> <change_format> <when input="oo.vcf_type" value="compressed" format="vcf_bgzip" /> </change_format> </data> <data name="out_callable" format="bed" label="${tool.name} on ${on_string}, Callable regions, bed"> <filter>bool(oo['outputCallableRegions'])</filter> </data> </outputs> <tests> <!-- #1; input bam, decompressed --> <test expect_num_outputs="2"> <param name="normalBam" value="sample1.bam" ftype="bam"/> <param name="tumorBam" value="sample2.bam" ftype="bam"/> <conditional name="ref_cond"> <param name="ref_sel" value="history"/> <param name="ref" value="hg98.fa" ftype="fasta"/> </conditional> <section name="oo"> <param name="vcf_type" value="decompressed"/> </section> <output name="out_indels" ftype="vcf"> <assert_contents> <has_n_lines n="41"/> <has_line_matching expression="#CHROM	POS	.+"/> <has_line_matching expression="demo20	3664	.+"/> </assert_contents> </output> <output name="out_snvs" ftype="vcf"> <assert_contents> <has_n_lines n="52"/> <has_line_matching expression="#CHROM	POS	.+"/> <has_line_matching expression="demo20	3537	.+"/> </assert_contents> </output> </test> <!-- #2; input cram, compressed --> <test expect_num_outputs="2"> <param name="normalBam" value="sample1.cram" ftype="cram"/> <param name="tumorBam" value="sample2.cram" ftype="cram"/> <conditional name="ref_cond"> <param name="ref_sel" value="history"/> <param name="ref" value="hg98.fa" ftype="fasta"/> </conditional> <section name="oo"> <param name="vcf_type" value="compressed"/> </section> <output name="out_indels" file="indels_test2.vcf.gz" ftype="vcf_bgzip" compare="sim_size"/> <output name="out_snvs" file="snvs_test2.vcf.gz" ftype="vcf_bgzip" compare="sim_size"/> </test> <!-- #3; input bam, decompressed, no defaults --> <test expect_num_outputs="3"> <param name="normalBam" value="sample1.bam" ftype="bam"/> <param name="tumorBam" value="sample2.bam" ftype="bam"/> <conditional name="ref_cond"> <param name="ref_sel" value="history"/> <param name="ref" value="hg98.fa" ftype="fasta"/> </conditional> <param name="optimization" value="--exome" /> <section name="oo"> <param name="vcf_type" value="decompressed"/> <param name="outputCallableRegions" value="true"/> </section> <section name="strelka"> <param name="depthFilterMultiple" value="2.8"/> <param name="snvMaxFilteredBasecallFrac" value="0.5"/> <param name="snvMaxSpanningDeletionFrac" value="0.76"/> <param name="indelMaxWindowFilteredBasecallFrac" value="0.4"/> <param name="ssnvPrior" value="0.0002"/> <param name="sindelPrior" value="0.000002"/> <param name="ssnvNoise" value="0.0000000004"/> <param name="sindelNoiseFactor" value="2.1"/> <param name="ssnvNoiseStrandBiasFrac" value="0.1"/> <param name="minTier1Mapq" value="21"/> <param name="minTier2Mapq" value="1"/> <param name="ssnvQuality_LowerBound" value="14"/> <param name="sindelQuality_LowerBound" value="41"/> <param name="ssnvContamTolerance" value="0.16"/> <param name="indelContamTolerance" value="0.16"/> <param name="maxIndelSize" value="50"/> </section> <output name="out_indels" ftype="vcf"> <assert_contents> <has_n_lines n="39"/> <has_line_matching expression="#CHROM	POS	.+"/> <has_line_matching expression="demo20	3664	.+"/> </assert_contents> </output> <output name="out_snvs" ftype="vcf"> <assert_contents> <has_n_lines n="51"/> <has_line_matching expression="#CHROM	POS	.+"/> <has_line_matching expression="demo20	3537	.+"/> </assert_contents> </output> <output name="out_callable" ftype="bed"> <assert_contents> <has_n_lines n="136"/> <has_line_matching expression="demo20	3971	.+"/> </assert_contents> </output> </test> <!-- #4; bam, reference cached --> <test expect_num_outputs="2"> <param name="normalBam" dbkey="hg19" value="sample1.bam" ftype="bam"/> <param name="tumorBam" value="sample2.bam" ftype="bam"/> <conditional name="ref_cond"> <param name="ref_sel" value="cached"/> <param name="ref" value="hg19"/> </conditional> <section name="oo"> <param name="vcf_type" value="decompressed"/> </section> <output name="out_indels" ftype="vcf"> <assert_contents> <has_n_lines n="41"/> <has_line_matching expression="#CHROM	POS	.+"/> <has_line_matching expression="demo20	3664	.+"/> </assert_contents> </output> <output name="out_snvs" ftype="vcf"> <assert_contents> <has_n_lines n="52"/> <has_line_matching expression="#CHROM	POS	.+"/> <has_line_matching expression="demo20	3537	.+"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ .. class:: infomark **What it does** @HELP_STRELKA@ The somatic calling model improves on the original Strelka method for liquid and late-stage tumor analysis by accounting for possible tumor cell contamination in the normal sample. A final empirical variant re-scoring step using random forest models trained on various call quality features has been added to both callers to further improve precision. **Input** @HELP_INPUT@ **Output** *INDEL* All somatic indels inferred in the tumor sample in VCF format. *SNVS* All somatic SNVs inferred in the tumor sample in VCF format. *Callability* The somatic variant caller can be configured with the option --outputCallableRegions, which will extend the somatic SNV quality model calculation to be applied as a test of somatic SNV callability at all positions in the genome. The outcome of this callability calculation will be summarized in a BED-formatted callability track. This BED track contains regions which are determined to be callable, indicating that there is sufficient evidence to either call a somatic SNV or assert the absence of a somatic SNV with a variant frequency of 10% or greater. Both somatic and non-somatic sites are determined to be 'callable' if the somatic or non-somatic quality threshold is at least 15. .. class:: infomark **References** @HELP_REFERENCES@ ]]></help> <expand macro="citations"/> </tool>
