Mercurial > repos > iuc > strelka_somatic
diff strelka_somatic.xml @ 0:c06e033242df draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/strelka commit 2e445e7c519b2b77498cb74c03ca6ed12b22423a"
| author | iuc |
|---|---|
| date | Wed, 27 Jan 2021 14:46:27 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strelka_somatic.xml Wed Jan 27 14:46:27 2021 +0000 @@ -0,0 +1,283 @@ +<?xml version="1.0"?> +<tool id="strelka_somatic" name="Strelka Somatic" version="@TOOL_VERSION@+@GALAXY_VERSION@"> + <description>@DESCRIPTION@ for somatic variation in tumor/normal sample pairs</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + ## initialize + #if $normalBam.is_of_type('bam') + ln -s '$normalBam' './input_normal.bam' && + ln -s '$normalBam.metadata.bam_index' './input_normal.bam.bai' && + #elif $normalBam.is_of_type('cram') + ln -s '$normalBam' './input_normal.cram' && + ln -s '$normalBam.metadata.cram_index' './input_normal.cram.crai' && + #end if + #if $tumorBam.is_of_type('bam') + ln -s '$tumorBam' './input_tumor.bam' && + ln -s '$tumorBam.metadata.bam_index' './input_tumor.bam.bai' && + #elif $tumorBam.is_of_type('cram') + ln -s '$tumorBam' './input_tumor.cram' && + ln -s '$tumorBam.metadata.cram_index' './input_tumor.cram.crai' && + #end if + @INIT@ + + ## create workflow + configureStrelkaSomaticWorkflow.py + #if $normalBam.is_of_type('bam') + --normalBam ./input_normal.bam + #elif $normalBam.is_of_type('cram') + --normalBam ./input_normal.cram + #end if + #if $tumorBam.is_of_type('bam') + --tumorBam ./input_tumor.bam + #elif $tumorBam.is_of_type('cram') + --tumorBam ./input_tumor.cram + #end if + $oo.outputCallableRegions + @CREATE@ + + ## run workflow + @RUN@ + + ## decompress results if needed and move everything to final destinations + #if $oo.vcf_type == "decompressed" + && bgzip -d results/results/variants/somatic.indels.vcf.gz + && bgzip -d results/results/variants/somatic.snvs.vcf.gz + && mv results/results/variants/somatic.indels.vcf '$out_indels' + && mv results/results/variants/somatic.snvs.vcf '$out_snvs' + #else + && mv results/results/variants/somatic.indels.vcf.gz '$out_indels' + && mv results/results/variants/somatic.snvs.vcf.gz '$out_snvs' + #end if + #if $oo.outputCallableRegions + && bgzip -d results/results/regions/somatic.callable.regions.bed.gz + && mv results/results/regions/somatic.callable.regions.bed '$out_callable' + #end if + + ]]></command> + <configfiles> + <configfile name="config_file"> +## parser cannot handle indents +[StrelkaSomatic] +depthFilterMultiple = $strelka.depthFilterMultiple +snvMaxFilteredBasecallFrac = $strelka.snvMaxFilteredBasecallFrac +snvMaxSpanningDeletionFrac = $strelka.snvMaxSpanningDeletionFrac +indelMaxWindowFilteredBasecallFrac = $strelka.indelMaxWindowFilteredBasecallFrac +ssnvPrior = $strelka.ssnvPrior +sindelPrior = $strelka.sindelPrior +ssnvNoise = $strelka.ssnvNoise +sindelNoiseFactor = $strelka.sindelNoiseFactor +ssnvNoiseStrandBiasFrac = $strelka.ssnvNoiseStrandBiasFrac +minTier1Mapq = $strelka.minTier1Mapq +minTier2Mapq = $strelka.minTier2Mapq +ssnvQuality_LowerBound = $strelka.ssnvQuality_LowerBound +sindelQuality_LowerBound = $strelka.sindelQuality_LowerBound +ssnvContamTolerance = $strelka.ssnvContamTolerance +indelContamTolerance = $strelka.indelContamTolerance +@CONFIG@ + </configfile> + </configfiles> + <inputs> + <param argument="--normalBam" type="data" format="bam,cram" multiple="false" label="Select normal sample file" help="In bam or cram format."/> + <param argument="--tumorBam" type="data" format="bam,cram" multiple="false" label="Select tumor sample file" help="In bam or cram format."/> + <expand macro="input_required"/> + <expand macro="calling_model" /> + <expand macro="calling_model_expert" /> + <expand macro="regions_select" /> + + <section name="oo" title="Output options" expanded="false"> + <expand macro="input_output"/> + <param argument="--outputCallableRegions" type="boolean" checked="false" truevalue="--outputCallableRegions" falsevalue="" label="Generate bed file describing somatic callable regions of the genome" help=""/> + </section> + + <section name="strelka" title="Strelka run configuration" expanded="false"> + <expand macro="input_strelka"/> + <param argument="depthFilterMultiple" type="float" value="3.0" label="Set depthFilterMultiple" help="If the depth filter is not skipped, all variants which occur at a depth greater than depthFilterMultiple*chromosome mean depth will be filtered out."/> + <param argument="snvMaxFilteredBasecallFrac" type="float" value="0.4" min="0.0" max="1.0" label="Set snvMaxFilteredBasecallFrac" help="Somatic SNV calls are filtered at sites where greater than this fraction of basecalls have been removed by the mismatch density filter in either sample."/> + <param argument="snvMaxSpanningDeletionFrac" type="float" value="0.75" min="0.0" max="1.0" label="Set snvMaxSpanningDeletionFrac" help="Somatic SNV calls are filtered at sites where greater than this fraction of overlapping reads contain deletions which span the SNV call site."/> + <param argument="indelMaxWindowFilteredBasecallFrac" type="float" value="0.3" min="0.0" max="1.0" label="Set indelMaxWindowFilteredBasecallFrac" help="Somatic indel calls are filtered if greater than this fraction of basecalls in a window extending 50 bases to each side of an indel's call position have been removed by the mismatch density filter."/> + <param argument="ssnvPrior" type="float" value="0.0001" min="0.0" label="Set ssnvPrior" help="Prior probability of a somatic snv or indel."/> + <param argument="sindelPrior" type="float" value="0.000001" min="0.0" label="Set sindelPrior" help="Prior probability of a somatic snv or indel."/> + <param argument="ssnvNoise" type="float" value="0.0000000005" min="0.0" label="Set ssnvNoise" help="Probability of an snv or indel noise allele NB: in the calling model a noise allele is shared in tumor and normal samples, but occurs at any frequency."/> + <param argument="sindelNoiseFactor" type="float" value="2.2" label="Set sindelNoiseFactor" help="Somatic indel noise factor."/> + <param argument="ssnvNoiseStrandBiasFrac" type="float" value="0.0" min="0.0" max="1.0" label="Set ssnvNoiseStrandBiasFrac" help="Fraction of snv noise attributed to strand-bias. It is not recommended to change this setting. However, if it is essential to turn the strand bias penalization off, the following is recommended: Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5, (1) set ssnvNoiseStrandBiasFrac = 0 (2) divide the current ssnvNoise value by 2."/> + <param argument="minTier1Mapq" type="integer" value="20" label="Set minTier1Mapq" help="Minimum MAPQ score for reads at tier1."/> + <param argument="minTier2Mapq" type="integer" value="0" label="Set minTier2Mapq" help="Minimum MAPQ score for reads at tier2."/> + <param argument="ssnvQuality_LowerBound" type="integer" value="15" label="Set ssnvQuality_LowerBound" help="Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are marked as filtered."/> + <param argument="sindelQuality_LowerBound" type="integer" value="40" label="Set sindelQuality_LowerBound" help="Somatic quality score (QSI_NT, NT=ref) below which somatic indels are marked as filtered."/> + <param argument="ssnvContamTolerance" type="float" value="0.15" min="0.0" max="1.0" label="Set ssnvContamTolerance" help="Tolerance of tumor contamination in the normal sample."/> + <param argument="indelContamTolerance" type="float" value="0.15" min="0.0" max="1.0" label="Set indelContamTolerance" help="Tolerance of tumor contamination in the normal sample."/> + </section> + </inputs> + <outputs> + <data name="out_indels" format="vcf" label="${tool.name} on ${on_string}, Indels, vcf"> + <change_format> + <when input="oo.vcf_type" value="compressed" format="vcf_bgzip" /> + </change_format> + </data> + <data name="out_snvs" format="vcf" label="${tool.name} on ${on_string}, SNVs, vcf"> + <change_format> + <when input="oo.vcf_type" value="compressed" format="vcf_bgzip" /> + </change_format> + </data> + <data name="out_callable" format="bed" label="${tool.name} on ${on_string}, Callable regions, bed"> + <filter>bool(oo['outputCallableRegions'])</filter> + </data> + </outputs> + <tests> + <!-- #1; input bam, decompressed --> + <test expect_num_outputs="2"> + <param name="normalBam" value="sample1.bam" ftype="bam"/> + <param name="tumorBam" value="sample2.bam" ftype="bam"/> + <conditional name="ref_cond"> + <param name="ref_sel" value="history"/> + <param name="ref" value="hg98.fa" ftype="fasta"/> + </conditional> + <section name="oo"> + <param name="vcf_type" value="decompressed"/> + </section> + <output name="out_indels" ftype="vcf"> + <assert_contents> + <has_n_lines n="41"/> + <has_line_matching expression="#CHROM	POS	.+"/> + <has_line_matching expression="demo20	3664	.+"/> + </assert_contents> + </output> + <output name="out_snvs" ftype="vcf"> + <assert_contents> + <has_n_lines n="52"/> + <has_line_matching expression="#CHROM	POS	.+"/> + <has_line_matching expression="demo20	3537	.+"/> + </assert_contents> + </output> + </test> + <!-- #2; input cram, compressed --> + <test expect_num_outputs="2"> + <param name="normalBam" value="sample1.cram" ftype="cram"/> + <param name="tumorBam" value="sample2.cram" ftype="cram"/> + <conditional name="ref_cond"> + <param name="ref_sel" value="history"/> + <param name="ref" value="hg98.fa" ftype="fasta"/> + </conditional> + <section name="oo"> + <param name="vcf_type" value="compressed"/> + </section> + <output name="out_indels" file="indels_test2.vcf.gz" ftype="vcf_bgzip" compare="sim_size"/> + <output name="out_snvs" file="snvs_test2.vcf.gz" ftype="vcf_bgzip" compare="sim_size"/> + </test> + <!-- #3; input bam, decompressed, no defaults --> + <test expect_num_outputs="3"> + <param name="normalBam" value="sample1.bam" ftype="bam"/> + <param name="tumorBam" value="sample2.bam" ftype="bam"/> + <conditional name="ref_cond"> + <param name="ref_sel" value="history"/> + <param name="ref" value="hg98.fa" ftype="fasta"/> + </conditional> + <param name="optimization" value="--exome" /> + <section name="oo"> + <param name="vcf_type" value="decompressed"/> + <param name="outputCallableRegions" value="true"/> + </section> + <section name="strelka"> + <param name="depthFilterMultiple" value="2.8"/> + <param name="snvMaxFilteredBasecallFrac" value="0.5"/> + <param name="snvMaxSpanningDeletionFrac" value="0.76"/> + <param name="indelMaxWindowFilteredBasecallFrac" value="0.4"/> + <param name="ssnvPrior" value="0.0002"/> + <param name="sindelPrior" value="0.000002"/> + <param name="ssnvNoise" value="0.0000000004"/> + <param name="sindelNoiseFactor" value="2.1"/> + <param name="ssnvNoiseStrandBiasFrac" value="0.1"/> + <param name="minTier1Mapq" value="21"/> + <param name="minTier2Mapq" value="1"/> + <param name="ssnvQuality_LowerBound" value="14"/> + <param name="sindelQuality_LowerBound" value="41"/> + <param name="ssnvContamTolerance" value="0.16"/> + <param name="indelContamTolerance" value="0.16"/> + <param name="maxIndelSize" value="50"/> + </section> + <output name="out_indels" ftype="vcf"> + <assert_contents> + <has_n_lines n="39"/> + <has_line_matching expression="#CHROM	POS	.+"/> + <has_line_matching expression="demo20	3664	.+"/> + </assert_contents> + </output> + <output name="out_snvs" ftype="vcf"> + <assert_contents> + <has_n_lines n="51"/> + <has_line_matching expression="#CHROM	POS	.+"/> + <has_line_matching expression="demo20	3537	.+"/> + </assert_contents> + </output> + <output name="out_callable" ftype="bed"> + <assert_contents> + <has_n_lines n="136"/> + <has_line_matching expression="demo20	3971	.+"/> + </assert_contents> + </output> + </test> + <!-- #4; bam, reference cached --> + <test expect_num_outputs="2"> + <param name="normalBam" dbkey="hg19" value="sample1.bam" ftype="bam"/> + <param name="tumorBam" value="sample2.bam" ftype="bam"/> + <conditional name="ref_cond"> + <param name="ref_sel" value="cached"/> + <param name="ref" value="hg19"/> + </conditional> + <section name="oo"> + <param name="vcf_type" value="decompressed"/> + </section> + <output name="out_indels" ftype="vcf"> + <assert_contents> + <has_n_lines n="41"/> + <has_line_matching expression="#CHROM	POS	.+"/> + <has_line_matching expression="demo20	3664	.+"/> + </assert_contents> + </output> + <output name="out_snvs" ftype="vcf"> + <assert_contents> + <has_n_lines n="52"/> + <has_line_matching expression="#CHROM	POS	.+"/> + <has_line_matching expression="demo20	3537	.+"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +@HELP_STRELKA@ + +The somatic calling model improves on the original Strelka method for liquid and late-stage tumor analysis by accounting for possible tumor cell contamination in the normal sample. A final empirical variant re-scoring step using random forest models trained on various call quality features has been added to both callers to further improve precision. + +**Input** + +@HELP_INPUT@ + +**Output** + +*INDEL* + +All somatic indels inferred in the tumor sample in VCF format. + +*SNVS* + +All somatic SNVs inferred in the tumor sample in VCF format. + +*Callability* + +The somatic variant caller can be configured with the option --outputCallableRegions, which will extend the somatic SNV quality model calculation to be applied as a test of somatic SNV callability at all positions in the genome. The outcome of this callability calculation will be summarized in a BED-formatted callability track. This BED track contains regions which are determined to be callable, indicating that there is sufficient evidence to either call a somatic SNV or assert the absence of a somatic SNV with a variant frequency of 10% or greater. Both somatic and non-somatic sites are determined to be 'callable' if the somatic or non-somatic quality threshold is at least 15. + +.. class:: infomark + +**References** + +@HELP_REFERENCES@ + ]]></help> + <expand macro="citations"/> +</tool>
