view strelka_somatic.xml @ 0:c06e033242df draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/strelka commit 2e445e7c519b2b77498cb74c03ca6ed12b22423a"
author iuc
date Wed, 27 Jan 2021 14:46:27 +0000
parents
children
line wrap: on
line source

<?xml version="1.0"?>
<tool id="strelka_somatic" name="Strelka Somatic" version="@TOOL_VERSION@+@GALAXY_VERSION@">
    <description>@DESCRIPTION@ for somatic variation in tumor/normal sample pairs</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        ## initialize
        #if $normalBam.is_of_type('bam')
            ln -s '$normalBam' './input_normal.bam' &&
            ln -s '$normalBam.metadata.bam_index' './input_normal.bam.bai' &&
        #elif $normalBam.is_of_type('cram')
            ln -s '$normalBam' './input_normal.cram' &&
            ln -s '$normalBam.metadata.cram_index' './input_normal.cram.crai' &&
        #end if
        #if $tumorBam.is_of_type('bam')
            ln -s '$tumorBam' './input_tumor.bam' &&
            ln -s '$tumorBam.metadata.bam_index' './input_tumor.bam.bai' &&
        #elif $tumorBam.is_of_type('cram')
            ln -s '$tumorBam' './input_tumor.cram' &&
            ln -s '$tumorBam.metadata.cram_index' './input_tumor.cram.crai' &&
        #end if
        @INIT@

        ## create workflow
        configureStrelkaSomaticWorkflow.py
            #if $normalBam.is_of_type('bam')
                --normalBam ./input_normal.bam
            #elif $normalBam.is_of_type('cram')
                --normalBam ./input_normal.cram
            #end if
            #if $tumorBam.is_of_type('bam')
                --tumorBam ./input_tumor.bam
            #elif $tumorBam.is_of_type('cram')
                --tumorBam ./input_tumor.cram
            #end if
            $oo.outputCallableRegions
            @CREATE@

        ## run workflow
        @RUN@

        ## decompress results if needed and move everything to final destinations
        #if $oo.vcf_type == "decompressed"
            && bgzip -d results/results/variants/somatic.indels.vcf.gz
            && bgzip -d results/results/variants/somatic.snvs.vcf.gz
            && mv results/results/variants/somatic.indels.vcf '$out_indels'
            && mv results/results/variants/somatic.snvs.vcf '$out_snvs'
        #else
            && mv results/results/variants/somatic.indels.vcf.gz '$out_indels'
            && mv results/results/variants/somatic.snvs.vcf.gz '$out_snvs'
        #end if
        #if $oo.outputCallableRegions
            && bgzip -d results/results/regions/somatic.callable.regions.bed.gz
            && mv results/results/regions/somatic.callable.regions.bed '$out_callable'
        #end if

    ]]></command>
    <configfiles>
        <configfile name="config_file">
## parser cannot handle indents
[StrelkaSomatic]
depthFilterMultiple = $strelka.depthFilterMultiple
snvMaxFilteredBasecallFrac = $strelka.snvMaxFilteredBasecallFrac
snvMaxSpanningDeletionFrac = $strelka.snvMaxSpanningDeletionFrac
indelMaxWindowFilteredBasecallFrac = $strelka.indelMaxWindowFilteredBasecallFrac
ssnvPrior = $strelka.ssnvPrior
sindelPrior = $strelka.sindelPrior
ssnvNoise = $strelka.ssnvNoise
sindelNoiseFactor = $strelka.sindelNoiseFactor
ssnvNoiseStrandBiasFrac = $strelka.ssnvNoiseStrandBiasFrac
minTier1Mapq = $strelka.minTier1Mapq
minTier2Mapq = $strelka.minTier2Mapq
ssnvQuality_LowerBound = $strelka.ssnvQuality_LowerBound
sindelQuality_LowerBound = $strelka.sindelQuality_LowerBound
ssnvContamTolerance = $strelka.ssnvContamTolerance
indelContamTolerance = $strelka.indelContamTolerance
@CONFIG@
        </configfile>
    </configfiles>
    <inputs>
        <param argument="--normalBam" type="data" format="bam,cram" multiple="false" label="Select normal sample file" help="In bam or cram format."/>
        <param argument="--tumorBam" type="data" format="bam,cram" multiple="false" label="Select tumor sample file" help="In bam or cram format."/>
        <expand macro="input_required"/>
        <expand macro="calling_model" />
        <expand macro="calling_model_expert" />
        <expand macro="regions_select" />

        <section name="oo" title="Output options" expanded="false">
            <expand macro="input_output"/>
            <param argument="--outputCallableRegions" type="boolean" checked="false" truevalue="--outputCallableRegions" falsevalue="" label="Generate bed file describing somatic callable regions of the genome" help=""/>
        </section>

        <section name="strelka" title="Strelka run configuration" expanded="false">
            <expand macro="input_strelka"/>
            <param argument="depthFilterMultiple" type="float" value="3.0" label="Set depthFilterMultiple" help="If the depth filter is not skipped, all variants which occur at a depth greater than depthFilterMultiple*chromosome mean depth will be filtered out."/>
            <param argument="snvMaxFilteredBasecallFrac" type="float" value="0.4" min="0.0" max="1.0" label="Set snvMaxFilteredBasecallFrac" help="Somatic SNV calls are filtered at sites where greater than this fraction of basecalls have been removed by the mismatch density filter in either sample."/>
            <param argument="snvMaxSpanningDeletionFrac" type="float" value="0.75" min="0.0" max="1.0" label="Set snvMaxSpanningDeletionFrac" help="Somatic SNV calls are filtered at sites where greater than this fraction of overlapping reads contain deletions which span the SNV call site."/>
            <param argument="indelMaxWindowFilteredBasecallFrac" type="float" value="0.3" min="0.0" max="1.0" label="Set indelMaxWindowFilteredBasecallFrac" help="Somatic indel calls are filtered if greater than this fraction of basecalls in a window extending 50 bases to each side of an indel's call position have been removed by the mismatch density filter."/>
            <param argument="ssnvPrior" type="float" value="0.0001" min="0.0" label="Set ssnvPrior" help="Prior probability of a somatic snv or indel."/>
            <param argument="sindelPrior" type="float" value="0.000001" min="0.0" label="Set sindelPrior" help="Prior probability of a somatic snv or indel."/>
            <param argument="ssnvNoise" type="float" value="0.0000000005" min="0.0" label="Set ssnvNoise" help="Probability of an snv or indel noise allele NB: in the calling model a noise allele is shared in tumor and normal samples, but occurs at any frequency."/>
            <param argument="sindelNoiseFactor" type="float" value="2.2" label="Set sindelNoiseFactor" help="Somatic indel noise factor."/>
            <param argument="ssnvNoiseStrandBiasFrac" type="float" value="0.0" min="0.0" max="1.0" label="Set ssnvNoiseStrandBiasFrac" help="Fraction of snv noise attributed to strand-bias. It is not recommended to change this setting. However, if it is essential to turn the strand bias penalization off, the following is recommended: Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5, (1) set ssnvNoiseStrandBiasFrac = 0 (2) divide the current ssnvNoise value by 2."/>
            <param argument="minTier1Mapq" type="integer" value="20" label="Set minTier1Mapq" help="Minimum MAPQ score for reads at tier1."/>
            <param argument="minTier2Mapq" type="integer" value="0" label="Set minTier2Mapq" help="Minimum MAPQ score for reads at tier2."/>
            <param argument="ssnvQuality_LowerBound" type="integer" value="15" label="Set ssnvQuality_LowerBound" help="Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are marked as filtered."/>
            <param argument="sindelQuality_LowerBound" type="integer" value="40" label="Set sindelQuality_LowerBound" help="Somatic quality score (QSI_NT, NT=ref) below which somatic indels are marked as filtered."/>
            <param argument="ssnvContamTolerance" type="float" value="0.15" min="0.0" max="1.0" label="Set ssnvContamTolerance" help="Tolerance of tumor contamination in the normal sample."/>
            <param argument="indelContamTolerance" type="float" value="0.15" min="0.0" max="1.0" label="Set indelContamTolerance" help="Tolerance of tumor contamination in the normal sample."/>
        </section>
    </inputs>
    <outputs>
        <data name="out_indels" format="vcf" label="${tool.name} on ${on_string}, Indels, vcf">
            <change_format>
                <when input="oo.vcf_type" value="compressed" format="vcf_bgzip" />
            </change_format>
        </data>
        <data name="out_snvs" format="vcf" label="${tool.name} on ${on_string}, SNVs, vcf">
            <change_format>
                <when input="oo.vcf_type" value="compressed" format="vcf_bgzip" />
            </change_format>
        </data>
        <data name="out_callable" format="bed" label="${tool.name} on ${on_string}, Callable regions, bed">
            <filter>bool(oo['outputCallableRegions'])</filter>
        </data>
    </outputs>
    <tests>
        <!-- #1; input bam, decompressed -->
        <test expect_num_outputs="2">
            <param name="normalBam" value="sample1.bam" ftype="bam"/>
            <param name="tumorBam" value="sample2.bam" ftype="bam"/>
            <conditional name="ref_cond">
                <param name="ref_sel" value="history"/>
                <param name="ref" value="hg98.fa" ftype="fasta"/>
            </conditional>
            <section name="oo">
                <param name="vcf_type" value="decompressed"/>
            </section>
            <output name="out_indels" ftype="vcf">
                <assert_contents>
                    <has_n_lines n="41"/>
                    <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
                    <has_line_matching expression="demo20&#009;3664&#009;.+"/>
                </assert_contents>
            </output>
            <output name="out_snvs" ftype="vcf">
                <assert_contents>
                    <has_n_lines n="52"/>
                    <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
                    <has_line_matching expression="demo20&#009;3537&#009;.+"/>
                </assert_contents>
            </output>
        </test>
        <!-- #2; input cram, compressed -->
        <test expect_num_outputs="2">
            <param name="normalBam" value="sample1.cram" ftype="cram"/>
            <param name="tumorBam" value="sample2.cram" ftype="cram"/>
            <conditional name="ref_cond">
                <param name="ref_sel" value="history"/>
                <param name="ref" value="hg98.fa" ftype="fasta"/>
            </conditional>
            <section name="oo">
                <param name="vcf_type" value="compressed"/>
            </section>
            <output name="out_indels" file="indels_test2.vcf.gz" ftype="vcf_bgzip" compare="sim_size"/>
            <output name="out_snvs" file="snvs_test2.vcf.gz" ftype="vcf_bgzip" compare="sim_size"/>
        </test>
        <!-- #3; input bam, decompressed, no defaults -->
        <test expect_num_outputs="3">
            <param name="normalBam" value="sample1.bam" ftype="bam"/>
            <param name="tumorBam" value="sample2.bam" ftype="bam"/>
            <conditional name="ref_cond">
                <param name="ref_sel" value="history"/>
                <param name="ref" value="hg98.fa" ftype="fasta"/>
            </conditional>
            <param name="optimization" value="--exome" />
            <section name="oo">
                <param name="vcf_type" value="decompressed"/>
                <param name="outputCallableRegions" value="true"/>
            </section>
            <section name="strelka">
                <param name="depthFilterMultiple" value="2.8"/>
                <param name="snvMaxFilteredBasecallFrac" value="0.5"/>
                <param name="snvMaxSpanningDeletionFrac" value="0.76"/>
                <param name="indelMaxWindowFilteredBasecallFrac" value="0.4"/>
                <param name="ssnvPrior" value="0.0002"/>
                <param name="sindelPrior" value="0.000002"/>
                <param name="ssnvNoise" value="0.0000000004"/>
                <param name="sindelNoiseFactor" value="2.1"/>
                <param name="ssnvNoiseStrandBiasFrac" value="0.1"/>
                <param name="minTier1Mapq" value="21"/>
                <param name="minTier2Mapq" value="1"/>
                <param name="ssnvQuality_LowerBound" value="14"/>
                <param name="sindelQuality_LowerBound" value="41"/>
                <param name="ssnvContamTolerance" value="0.16"/>
                <param name="indelContamTolerance" value="0.16"/>
                <param name="maxIndelSize" value="50"/>
            </section>
            <output name="out_indels" ftype="vcf">
                <assert_contents>
                    <has_n_lines n="39"/>
                    <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
                    <has_line_matching expression="demo20&#009;3664&#009;.+"/>
                </assert_contents>
            </output>
            <output name="out_snvs" ftype="vcf">
                <assert_contents>
                    <has_n_lines n="51"/>
                    <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
                    <has_line_matching expression="demo20&#009;3537&#009;.+"/>
                </assert_contents>
            </output>
            <output name="out_callable" ftype="bed">
                <assert_contents>
                    <has_n_lines n="136"/>
                    <has_line_matching expression="demo20&#009;3971&#009;.+"/>
                </assert_contents>
            </output>
        </test>
        <!-- #4; bam, reference cached -->
        <test expect_num_outputs="2">
            <param name="normalBam" dbkey="hg19" value="sample1.bam" ftype="bam"/>
            <param name="tumorBam" value="sample2.bam" ftype="bam"/>
            <conditional name="ref_cond">
                <param name="ref_sel" value="cached"/>
                <param name="ref" value="hg19"/>
            </conditional>
            <section name="oo">
                <param name="vcf_type" value="decompressed"/>
            </section>
            <output name="out_indels" ftype="vcf">
                <assert_contents>
                    <has_n_lines n="41"/>
                    <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
                    <has_line_matching expression="demo20&#009;3664&#009;.+"/>
                </assert_contents>
            </output>
            <output name="out_snvs" ftype="vcf">
                <assert_contents>
                    <has_n_lines n="52"/>
                    <has_line_matching expression="#CHROM&#009;POS&#009;.+"/>
                    <has_line_matching expression="demo20&#009;3537&#009;.+"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

@HELP_STRELKA@

The somatic calling model improves on the original Strelka method for liquid and late-stage tumor analysis by accounting for possible tumor cell contamination in the normal sample. A final empirical variant re-scoring step using random forest models trained on various call quality features has been added to both callers to further improve precision.

**Input**

@HELP_INPUT@

**Output**

*INDEL*

All somatic indels inferred in the tumor sample in VCF format.

*SNVS*

All somatic SNVs inferred in the tumor sample in VCF format. 

*Callability*

The somatic variant caller can be configured with the option --outputCallableRegions, which will extend the somatic SNV quality model calculation to be applied as a test of somatic SNV callability at all positions in the genome. The outcome of this callability calculation will be summarized in a BED-formatted callability track. This BED track contains regions which are determined to be callable, indicating that there is sufficient evidence to either call a somatic SNV or assert the absence of a somatic SNV with a variant frequency of 10% or greater. Both somatic and non-somatic sites are determined to be 'callable' if the somatic or non-somatic quality threshold is at least 15.

.. class:: infomark

**References**

@HELP_REFERENCES@
    ]]></help>
    <expand macro="citations"/>
</tool>