Mercurial > repos > elixir-it > mutect2
comparison mutect2.xml @ 0:0cc081cd3992 draft
Uploaded
| author | elixir-it |
|---|---|
| date | Thu, 28 Jun 2018 05:58:45 -0400 |
| parents | |
| children | 2ebf2cd4f18f |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0cc081cd3992 |
|---|---|
| 1 <tool id="mutect2" name="MuTect2" version="3.8"> | |
| 2 <description>somatic SNP and indel caller</description> | |
| 3 <macros> | |
| 4 <import>mutect2_macros_add_loc.xml</import> | |
| 5 </macros> | |
| 6 <requirements> | |
| 7 <requirement type="package" version="3.8" >gatk</requirement> | |
| 8 <requirement type="package" version="2.7.1" >picard</requirement> | |
| 9 <requirement type="package" version="1.7" >samtools</requirement> | |
| 10 </requirements> | |
| 11 <command> | |
| 12 <![CDATA[ | |
| 13 ##creation of .bai the -@ option is used to allocate additional threads | |
| 14 samtools index -@ \${GALAXY_SLOTS:-4} $input1 && | |
| 15 samtools index -@ \${GALAXY_SLOTS:-4} $input2 && | |
| 16 | |
| 17 ## TODO creation of symlinks because mutect2 want the extensions of the file | |
| 18 ln -s $input1 tumor.bam && | |
| 19 ln -s $input2 normal.bam && | |
| 20 ln -s $input1".bai" tumor.bam.bai && | |
| 21 ln -s $input2".bai" normal.bam.bai && | |
| 22 #if $reference_source == "history" | |
| 23 ln -s $reference genome.fa && | |
| 24 ln -s $reference".fai" genome.fa.fai | |
| 25 #end if | |
| 26 #if $list | |
| 27 ln -s $list position.bed && | |
| 28 #end if | |
| 29 #if $dbSNP | |
| 30 ln -s $dbSNP dbSNP.vcf && | |
| 31 #end if | |
| 32 #if $cosmic | |
| 33 ln -s $cosmic cosmic.vcf && | |
| 34 #end if | |
| 35 #if $alleles | |
| 36 ln -s $alleles alleles.vcf | |
| 37 #end if | |
| 38 | |
| 39 ##TODO creation of .dict file of the genome required by mutect2 to run | |
| 40 #if $reference_source == "history" | |
| 41 java -jar \$CONDA_DEFAULT_ENV/share/picard-2.7.1-2/picard.jar CreateSequenceDictionary R= genome.fa O= genome.dict 2>$log | |
| 42 #end if | |
| 43 | |
| 44 ##TODO gatk-register take the GenomeAnalysisTK-3.8-0-ge9d806836.tar.bz2 unzip it | |
| 45 ##and move the .jar file to \$CONDA_DEFAULT_ENV/opt/gatk-3.8/ then the mutect2 command is runned | |
| 46 gatk3-register \$_CONDA_DIR/../GenomeAnalysisTK-3.8-0-ge9d806836.tar.bz2 2>$log ; | |
| 47 java -jar \$CONDA_DEFAULT_ENV/opt/gatk-3.8/GenomeAnalysisTK.jar -nct \${GALAXY_SLOTS:-4} -T MuTect2 -I:tumor tumor.bam -I:normal normal.bam -o $output | |
| 48 #if $reference_source == "history" | |
| 49 -R genome.fa | |
| 50 #else | |
| 51 -R $reference_source.ref_file.fields.path | |
| 52 #end if | |
| 53 ## TODO advanced inputs section if the optional inputs are present their options are added to the command | |
| 54 #if $dbSNP | |
| 55 --dbsnp dbSNP.vcf | |
| 56 #end if | |
| 57 #if $cosmic | |
| 58 --cosmic cosmic.vcf | |
| 59 #end if | |
| 60 #if $list | |
| 61 -L position.bed | |
| 62 #end if | |
| 63 #if $alleles | |
| 64 --alleles alleles.vcf | |
| 65 #end if | |
| 66 | |
| 67 ##TODO advanced options section if the options inputs are different from the default value the option is added to the command | |
| 68 | |
| 69 #if str($advanced.advanced_parameters) =="show": | |
| 70 #if $advanced.heterozygosity != "0.001" | |
| 71 --heterozygosity $advanced.heterozygosity | |
| 72 #end if | |
| 73 #if $advanced.heterozygosity_stdev != "0.01" | |
| 74 --heterozygosity_stdev $advanced.heterozygosity_stdev | |
| 75 #end if | |
| 76 #if $advanced.indel_heterozygosity != "1.25E-4" | |
| 77 --indel_heterozygosity $advanced.indel_heterozygosity | |
| 78 #end if | |
| 79 #if $advanced.initial_normal_lod != "0.5" | |
| 80 --initial_normal_lod $advanced.initial_normal_lod | |
| 81 #end if | |
| 82 #if $advanced.initial_tumor_lod != "4.0" | |
| 83 --initial_tumor_lod $advanced.initial_tumor_lod | |
| 84 #end if | |
| 85 #if $advanced.max_alt_allele_in_normal_fraction != "0.03" | |
| 86 --max_alt_allele_in_normal_fraction $advanced.max_alt_allele_in_normal_fraction | |
| 87 #end if | |
| 88 #if $advanced.max_alt_alleles_in_normal_count != "1" | |
| 89 --max_alt_alleles_in_normal_count $advanced.max_alt_alleles_in_normal_count | |
| 90 #end if | |
| 91 #if $advanced.max_alt_alleles_in_normal_qscore_sum != "20" | |
| 92 --max_alt_alleles_in_normal_qscore_sum $advanced.max_alt_alleles_in_normal_qscore_sum | |
| 93 #end if | |
| 94 #if $advanced.maxReadsInRegionPerSample != "1000" | |
| 95 --maxReadsInRegionPerSample $advanced.maxReadsInRegionPerSample | |
| 96 #end if | |
| 97 #if $advanced.min_base_quality_score != "10" | |
| 98 --min_base_quality_score $advanced.min_base_quality_score | |
| 99 #end if | |
| 100 #if $advanced.minReadsPerAlignmentStart != "5" | |
| 101 --minReadsPerAlignmentStart $advanced.minReadsPerAlignmentStart | |
| 102 #end if | |
| 103 #if $advanced.normal_lod != "2.2" | |
| 104 --normal_lod $advanced.normal_lod | |
| 105 #end if | |
| 106 #if $advanced.pir_mad_threshold != "3.0" | |
| 107 --pir_mad_threshold $advanced.pir_mad_threshold | |
| 108 #end if | |
| 109 #if $advanced.pir_median_threshold != "10.0" | |
| 110 --pir_median_threshold $advanced.pir_median_threshold | |
| 111 #end if | |
| 112 #if $advanced.power_constant_qscore != "30" | |
| 113 --power_constant_qscore $advanced.power_constant_qscore | |
| 114 #end if | |
| 115 #if $advanced.sample_ploidy != "2" | |
| 116 --sample_ploidy $advanced.sample_ploidy | |
| 117 #end if | |
| 118 #if $advanced.standard_min_confidence_threshold_for_calling != "10.0" | |
| 119 --standard_min_confidence_threshold_for_calling $advanced.standard_min_confidence_threshold_for_calling | |
| 120 #end if | |
| 121 #if $advanced.tumor_lod != "6.3" | |
| 122 --tumor_lod $advanced.tumor_lod | |
| 123 #end if | |
| 124 #if $advanced.contamination_fraction_to_filter != "0.0" | |
| 125 --contamination_fraction_to_filter $contamination_fraction_to_filter | |
| 126 #end if | |
| 127 #if $advanced.dbsnp_normal_lod != "5.5" | |
| 128 --dbsnp_normal_lod $dbsnp_normal_lod | |
| 129 #end if | |
| 130 #if $advanced.debug_read_name != "" | |
| 131 --debug_read_name $debug_read_name | |
| 132 #end if | |
| 133 #if $advanced.genotyping_mode != "DISCOVERY" | |
| 134 --genotyping_mode $genotyping_mode | |
| 135 #end if | |
| 136 #if $advanced.group | |
| 137 --group $advanced.group | |
| 138 #end if | |
| 139 #end if | |
| 140 | |
| 141 ##TODO output section --> if the option string == "yes" the optional output is added | |
| 142 #if str($optional_out1.outFile1) =="yes" | |
| 143 --activeRegionOut $activeRegionOut_output | |
| 144 #end if | |
| 145 #if str($optional_out2.outFile2) =="yes" | |
| 146 --activityProfileOut $activityProfileOut_output | |
| 147 #end if | |
| 148 #if str($optional_out3.outFile3) =="yes" | |
| 149 --graphOutput $graphOutput_output | |
| 150 #end if | |
| 151 #if str($optional_out4.outFile4) =="yes" | |
| 152 --bamOutput $bamOutput_output | |
| 153 #end if | |
| 154 ##TODO the standard error is redirected to the log file | |
| 155 2> $log | |
| 156 ]]></command> | |
| 157 <inputs> | |
| 158 <expand macro="reference_loc"/> | |
| 159 <param format="bam" name="input1" type="data" label="tumor bam" help="bamfile"/> | |
| 160 <param format="bam" name="input2" type="data" label="normal bam" help="bamfile"/> | |
| 161 <param format="vcf" name="dbSNP" type="data" optional="true" label="dbsnp file.vcf" help="vcf file"/> | |
| 162 <param format="vcf" name="cosmic" type="data" optional="true" label="cosmic file.vcf" help="vcf file"/> | |
| 163 <param format="bed" name="list" type="data" optional="true" label="position list" help="bed file"/> | |
| 164 <param format="vcf" name="alleles" type="data" optional="true" label="set of alleles use in genotyping" help="vcf file"/> | |
| 165 <conditional name="advanced"> | |
| 166 <param name="advanced_parameters" type="select" label="advanced_parameters"> | |
| 167 <option value="hide" selected="true">Hide</option> | |
| 168 <option value="show">Show</option> | |
| 169 </param> | |
| 170 <when value="hide"/> | |
| 171 <when value="show"> | |
| 172 <param name="heterozygosity" type="float" optional="true" value="0.001" help="Heterozygosity value used to compute prior likelihoods for any locus" /> | |
| 173 <param name="heterozygosity_stdev" type="float" optional="true" value="0.01" help="Standard deviation of eterozygosity for SNP and indel calling"/> | |
| 174 <param name="indel_heterozygosity" type="text" value="1.25E-4" optional="true" help="Heterozygosity for indel calling" /> | |
| 175 <param name="initial_normal_lod" type="float" optional="true" value="0.5" help="Initial LOD threshold for calling normal variant" /> | |
| 176 <param name="initial_tumor_lod" type="float" optional="true" value="4.0" help="Initial LOD threshold for calling tumor variant" /> | |
| 177 <param name="max_alt_allele_in_normal_fraction" type="float" optional="true" value="0.03" help="Threshold for maximum alternate allele fraction in normal" /> | |
| 178 <param name="max_alt_alleles_in_normal_count" type="text" optional="true" value="1" help="Threshold for maximum alternate allele counts in normal" /> | |
| 179 <param name="max_alt_alleles_in_normal_qscore_sum" type="text" optional="true" value="20" help="Threshold for maximum alternate allele quality score sum in normal" /> | |
| 180 <param name="maxReadsInRegionPerSample" type="text" optional="true" value="1000" help="Maximum reads in an active region" /> | |
| 181 <param name="min_base_quality_score" type="text" size="2" optional="true" value="10" help="Minimum base quality required to consider a base for calling" /> | |
| 182 <param name="minReadsPerAlignmentStart" type="text" optional="true" value="5" help="Minimum number of reads sharing the same alignment start for each genomic location in an active region" /> | |
| 183 <param name="normal_lod" type="float" optional="true" value="2.2" help="LOD threshold for calling normal non-germline" /> | |
| 184 <param name="pir_mad_threshold" type="float" optional="true" value="3.0" help="threshold for clustered read position artifact MAD" /> | |
| 185 <param name="pir_median_threshold" type="float" optional="true" value="10.0" help="threshold for clustered read position artifact median" /> | |
| 186 <param name="power_constant_qscore" type="text" optional="true" value="30" help="Phred scale quality score constant to use in power calculations" /> | |
| 187 <param name="sample_ploidy" type="text" optional="true" value="2" help="ploidy per sample" /> | |
| 188 <param name="standard_min_confidence_threshold_for_calling" type="float" optional="true" value="10.0" help="The minimum phred-scaled confidence threshold at which variants should be called" /> | |
| 189 <param name="tumor_lod" type="float" optional="true" value="6.3" help="LOD threshold for calling tumor variant" /> | |
| 190 <param name="contamination_fraction_to_filter" type="float" optional="true" value="0.0" help="Fraction of contamination to aggressively remove" /> | |
| 191 <param name="dbsnp_normal_lod" type="float" optional="true" value="5.5" help="LOD threshold for calling normal non-variant at dbsnp sites" /> | |
| 192 <param name="debug_read_name" type="text" optional="true" value="" help="trace this read name through the calling process" /> | |
| 193 <param name="genotyping_mode" type="select" optional="true" help="Specifies how to determine the alternate alleles to use for genotyping" > | |
| 194 <option value="DISCOVERY" selected="true">DISCOVERY</option> | |
| 195 <option value="GENOTYPE_GIVEN_ALLELES">GENOTYPE_GIVEN_ALLELES</option> | |
| 196 </param> | |
| 197 <param name="group" type="text" optional="true" help="one or more classes, groups of annotation to apply to variant call" /> | |
| 198 </when> | |
| 199 </conditional> | |
| 200 <conditional name="optional_out1"> | |
| 201 <param name="outFile1" type="select" label="activeRegionOut"> | |
| 202 <option value="no" selected="true">no</option> | |
| 203 <option value="yes">yes</option> | |
| 204 </param> | |
| 205 <when value="no"/> | |
| 206 <when value="yes"/> | |
| 207 </conditional> | |
| 208 <conditional name="optional_out2"> | |
| 209 <param name="outFile2" type="select" label="activityprofileOut"> | |
| 210 <option value="no" selected="true">no</option> | |
| 211 <option value="yes">yes</option> | |
| 212 </param> | |
| 213 <when value="no"/> | |
| 214 <when value="yes"/> | |
| 215 </conditional> | |
| 216 <conditional name="optional_out3"> | |
| 217 <param name="outFile3" type="select" label="graphOutput"> | |
| 218 <option value="no" selected="true">no</option> | |
| 219 <option value="yes">yes</option> | |
| 220 </param> | |
| 221 <when value="no"/> | |
| 222 <when value="yes"/> | |
| 223 </conditional> | |
| 224 <conditional name="optional_out4"> | |
| 225 <param name="outFile4" type="select" label="Bamoutput"> | |
| 226 <option value="no" selected="true">no</option> | |
| 227 <option value="yes">yes</option> | |
| 228 </param> | |
| 229 <when value="no"/> | |
| 230 <when value="yes"/> | |
| 231 </conditional> | |
| 232 </inputs> | |
| 233 <outputs> | |
| 234 <data format="vcf" name="output" label="${tool.name} on ${on_string}"/> | |
| 235 <data format="txt" name="log" label="${tool.name} on ${on_string} :log"/> | |
| 236 <data format="txt" name="activeRegionOut_output" optional="true" label="${tool.name} on ${on_string} :activeRegionOut"> | |
| 237 <filter>optional_out1['outFile1'] == 'yes'</filter> | |
| 238 </data> | |
| 239 <data format="txt" name="activityProfileOut_output" label="${tool.name} on ${on_string} :activityProfileOut"> | |
| 240 <filter>optional_out2['outFile2'] == 'yes'</filter> | |
| 241 </data> | |
| 242 <data format="txt" name="graphOutput_output" label="${tool.name} on ${on_string} :graphOutput"> | |
| 243 <filter>optional_out3['outFile3'] == 'yes'</filter> | |
| 244 </data> | |
| 245 <data format="txt" name="bamOutput_output" label="${tool.name} on ${on_string} :bamOutput"> | |
| 246 <filter>optional_out4['outFile4'] == 'yes'</filter> | |
| 247 </data> | |
| 248 </outputs> | |
| 249 <tests> | |
| 250 <test> | |
| 251 <conditional name="reference_source"> | |
| 252 <param name="reference_source_selector" value="history"/> | |
| 253 <param name="ref_file" value="test_fasta.fa"/> | |
| 254 </conditional> | |
| 255 <param name="input1" value="mutect2_test_tumoral2.bam" /> | |
| 256 <param name="input2" value="mutect2_test_normal2.bam" /> | |
| 257 </test> | |
| 258 </tests> | |
| 259 <help> | |
| 260 **IMPORTANT** to get the wrapper ready to start the admin user have to download gatk GATK 3.8-0-ge9d806836 from the broadinstitute site https://software.broadinstitute.org/gatk/download/archive and then move it in the conda_prefix folder | |
| 261 the path of the conda_prefix is written in the galaxy.ini(or .yml) file | |
| 262 | |
| 263 MuTect2 is a somatic SNP and indel caller that combines the DREAM challenge-winning somatic genotyping engine of the original MuTect (Cibulskis et al., 2013) with the assembly-based machinery of HaplotypeCaller. | |
| 264 Galaxy wrapper for MuTect2 implements most but not all options available through the command line. Supported options are described below. | |
| 265 | |
| 266 **Optional Inputs** | |
| 267 | |
| 268 + --alleles none Set of alleles to use in genotyping | |
| 269 + --cosmic [] VCF file of COSMIC sites | |
| 270 + --dbsnp none dbSNP file | |
| 271 + --activityProfileOut NA Output the raw activity profile results in IGV format | |
| 272 + --graphOutput NA Write debug assembly graph information to this file | |
| 273 | |
| 274 **Optional Parameters** | |
| 275 | |
| 276 + --contamination_fraction_to_filter 0.0 Fraction of contamination to aggressively remove | |
| 277 + --dbsnp_normal_lod 5.5 LOD threshold for calling normal non-variant at dbsnp sites | |
| 278 + --debug_read_name NA trace this read name through the calling process | |
| 279 + --genotyping_mode DISCOVERY Specifies how to determine the alternate alleles to use for genotyping | |
| 280 + --group [] One or more classes/groups of annotations to apply to variant calls | |
| 281 + --heterozygosity 0.001 Heterozygosity value used to compute prior likelihoods for any locus | |
| 282 + --heterozygosity_stdev 0.01 Standard deviation of eterozygosity for SNP and indel calling | |
| 283 + --indel_heterozygosity 1.25E-4 Heterozygosity for indel calling | |
| 284 + --initial_normal_lod 0.5 Initial LOD threshold for calling normal variant | |
| 285 + --initial_tumor_lod 4.0 Initial LOD threshold for calling tumor variant | |
| 286 + --max_alt_allele_in_normal_fraction 0.03 Threshold for maximum alternate allele fraction in normal | |
| 287 + --max_alt_alleles_in_normal_count 1 Threshold for maximum alternate allele counts in normal | |
| 288 + --max_alt_alleles_in_normal_qscore_sum 20 Threshold for maximum alternate allele quality score sum in normal | |
| 289 + --maxReadsInRegionPerSample 1000 Maximum reads in an active region | |
| 290 + --min_base_quality_score 10 Minimum base quality required to consider a base for calling | |
| 291 + --minReadsPerAlignmentStart 5 Minimum number of reads sharing the same alignment start for each genomic location in an active region | |
| 292 + --normal_lod 2.2 LOD threshold for calling normal non-germline | |
| 293 + --pir_mad_threshold 3.0 threshold for clustered read position artifact MAD | |
| 294 + --pir_median_threshold 10.0 threshold for clustered read position artifact median | |
| 295 + --power_constant_qscore 30 Phred scale quality score constant to use in power calculations | |
| 296 + --sample_ploidy 2 Ploidy per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy). | |
| 297 + --standard_min_confidence_threshold_for_calling 10.0 The minimum phred-scaled confidence threshold at which variants should be called | |
| 298 + --tumor_lod 6.3 LOD threshold for calling tumor variant | |
| 299 | |
| 300 **Advanced Outputs** | |
| 301 | |
| 302 + --bamOutput | |
| 303 + --activeRegionOut | |
| 304 + --activityProfileOut | |
| 305 + --graphOutput | |
| 306 | |
| 307 more information at https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_cancer_m2_MuTect2.php | |
| 308 </help> | |
| 309 <citations> | |
| 310 <citation type="doi">10.1038/nbt.2514</citation> | |
| 311 </citations> | |
| 312 </tool> |
