| 0 | 1 <tool id="gatk_variant_eval" name="Eval Variants" version="0.0.8"> | 
|  | 2   <description></description> | 
|  | 3   <requirements> | 
|  | 4       <requirement type="package" version="1.4">gatk</requirement> | 
|  | 5   </requirements> | 
|  | 6   <macros> | 
|  | 7     <import>gatk_macros.xml</import> | 
|  | 8   </macros> | 
|  | 9   <command interpreter="python">gatk_wrapper.py | 
|  | 10    #from binascii import hexlify | 
|  | 11    --max_jvm_heap_fraction "1" | 
|  | 12    --stdout "${output_log}" | 
|  | 13    #for $var_count, $variant in enumerate( $reference_source.variants ): | 
|  | 14       -d "--eval:input_${var_count},%(file_type)s" "${variant.input_variant}" "${variant.input_variant.ext}" "input_variants_${var_count}" | 
|  | 15    #end for | 
|  | 16    -p 'java | 
|  | 17     -jar "\$JAVA_JAR_PATH/GenomeAnalysisTK.jar" | 
|  | 18     -T "VariantEval" | 
|  | 19     --out "${output_report}" | 
|  | 20     --num_threads \${GALAXY_SLOTS:-4} | 
|  | 21     -et "NO_ET" ##ET no phone home | 
|  | 22     ##-log "${output_log}" ##don't use this to log to file, instead directly capture stdout | 
|  | 23     #if $reference_source.reference_source_selector != "history": | 
|  | 24         -R "${reference_source.ref_file.fields.path}" | 
|  | 25     #end if | 
|  | 26    ' | 
|  | 27 | 
|  | 28     #for $rod_binding in $comp_rod_bind: | 
|  | 29         -d "--comp:${rod_binding.comp_rod_name},%(file_type)s" "${rod_binding.comp_input_rod}" "${rod_binding.comp_input_rod.ext}" "input_comp_${rod_binding.comp_rod_name}" | 
|  | 30         #if str( $rod_binding.comp_known_names ): | 
|  | 31             -p '--known_names "${rod_binding.comp_rod_name}"' | 
|  | 32         #end if | 
|  | 33     #end for | 
|  | 34 | 
|  | 35     #if str( $dbsnp_rod_bind_type.dbsnp_rod_bind_type_selector ) == 'set_dbsnp': | 
|  | 36         -d "--dbsnp:${dbsnp_rod_bind_type.dbsnp_rod_name},%(file_type)s" "${dbsnp_rod_bind_type.dbsnp_input_rod}" "${dbsnp_rod_bind_type.dbsnp_input_rod.ext}" "input_dbsnp_${dbsnp_rod_bind_type.dbsnp_rod_name}" | 
|  | 37         #if str( $dbsnp_rod_bind_type.dbsnp_known_names ): | 
|  | 38             -p '--known_names "${dbsnp_rod_bind_type.dbsnp_rod_name}"' | 
|  | 39         #end if | 
|  | 40     #end if | 
|  | 41 | 
|  | 42     #include source=$standard_gatk_options# | 
|  | 43 | 
|  | 44 | 
|  | 45     ##start analysis specific options | 
|  | 46     #if $analysis_param_type.analysis_param_type_selector == "advanced": | 
|  | 47         #for $stratification in $analysis_param_type.stratifications: | 
|  | 48             #set $select_string = "--select_exps '%s' --select_names '%s'" % ( str( $stratification.select_exps ), str( $stratification.select_name )  ) | 
|  | 49             -o '${ hexlify( $select_string ) }' | 
|  | 50         #end for | 
|  | 51         -p ' | 
|  | 52 | 
|  | 53         #for $sample in $analysis_param_type.samples: | 
|  | 54             --sample "${sample.sample}" | 
|  | 55         #end for | 
|  | 56 | 
|  | 57         #if str( $analysis_param_type.stratification_modules ) != "None": | 
|  | 58             #for $stratification_module in str( $analysis_param_type.stratification_modules).split( ',' ): | 
|  | 59                 --stratificationModule "${stratification_module}" | 
|  | 60             #end for | 
|  | 61         #end if | 
|  | 62 | 
|  | 63         ${analysis_param_type.do_not_use_all_standard_stratifications} | 
|  | 64 | 
|  | 65         #for $variant_type in $analysis_param_type.only_variants_of_type: | 
|  | 66             --onlyVariantsOfType "${variant_type.variant_type}" | 
|  | 67         #end for | 
|  | 68 | 
|  | 69         #if str( $analysis_param_type.eval_modules ) != "None": | 
|  | 70             #for $eval_module in str( $analysis_param_type.eval_modules).split( ',' ): | 
|  | 71                 --evalModule "${eval_module}" | 
|  | 72             #end for | 
|  | 73         #end if | 
|  | 74 | 
|  | 75         ${analysis_param_type.do_not_use_all_standard_modules} | 
|  | 76 | 
|  | 77         #if str( $analysis_param_type.num_samples ) != "0": | 
|  | 78             --numSamples "${analysis_param_type.num_samples}" | 
|  | 79         #end if | 
|  | 80 | 
|  | 81         --minPhaseQuality "${analysis_param_type.min_phase_quality}" | 
|  | 82 | 
|  | 83         #if str( $analysis_param_type.family ): | 
|  | 84             --family_structure "${analysis_param_type.family}" | 
|  | 85         #end if | 
|  | 86 | 
|  | 87         --mendelianViolationQualThreshold "${analysis_param_type.mendelian_violation_qual_threshold}" | 
|  | 88 | 
|  | 89         #if str( $analysis_param_type.ancestral_alignments ) != "None": | 
|  | 90             --ancestralAlignments "${analysis_param_type.ancestral_alignments}" | 
|  | 91         #end if | 
|  | 92         ' | 
|  | 93         #if str( $analysis_param_type.known_cnvs ) != "None": | 
|  | 94             -d "--knownCNVs" "${analysis_param_type.known_cnvs}" "${analysis_param_type.known_cnvs.ext}" "input_known_cnvs" | 
|  | 95         #end if | 
|  | 96 | 
|  | 97         #if str( $analysis_param_type.strat_intervals ) != "None": | 
|  | 98             -d "--stratIntervals" "${analysis_param_type.strat_intervals}" "${analysis_param_type.strat_intervals.ext}" "input_strat_intervals" | 
|  | 99         #end if | 
|  | 100     #end if | 
|  | 101   </command> | 
|  | 102   <inputs> | 
|  | 103 | 
|  | 104     <conditional name="reference_source"> | 
|  | 105       <expand macro="reference_source_selector_param" /> | 
|  | 106       <when value="cached"> | 
|  | 107         <repeat name="variants" title="Variant" min="1" help="-eval,--eval &lt;eval&gt;"> | 
|  | 108           <param name="input_variant" type="data" format="vcf" label="Input variant file" /> | 
|  | 109         </repeat> | 
|  | 110         <param name="ref_file" type="select" label="Using reference genome" help="-R,--reference_sequence &lt;reference_sequence&gt;"> | 
|  | 111           <options from_data_table="gatk_picard_indexes"> | 
|  | 112             <!-- <filter type="data_meta" key="dbkey" ref="input_variant" column="dbkey"/> --> | 
|  | 113           </options> | 
|  | 114           <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> | 
|  | 115         </param> | 
|  | 116       </when> | 
|  | 117       <when value="history"> <!-- FIX ME!!!! --> | 
|  | 118         <repeat name="variants" title="Variant" min="1" help="-eval,--eval &lt;eval&gt;"> | 
|  | 119           <param name="input_variant" type="data" format="vcf" label="Input variant file" /> | 
|  | 120         </repeat> | 
|  | 121         <param name="ref_file" type="data" format="fasta" label="Using reference file" help="-R,--reference_sequence &lt;reference_sequence&gt;" /> | 
|  | 122       </when> | 
|  | 123     </conditional> | 
|  | 124 | 
|  | 125     <repeat name="comp_rod_bind" title="Binding for reference-ordered comparison data" help="-comp,--comp &lt;comp&gt;"> | 
|  | 126       <param name="comp_input_rod" type="data" format="vcf" label="Comparison ROD file" /> | 
|  | 127       <param name="comp_rod_name" type="text" value="Unnamed" label="Comparison ROD Name"/> | 
|  | 128       <param name="comp_known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Use Comparison ROD as known_names" help="-knownName,--known_names &lt;known_names&gt;"/> | 
|  | 129     </repeat> | 
|  | 130 | 
|  | 131     <conditional name="dbsnp_rod_bind_type"> | 
|  | 132       <param name="dbsnp_rod_bind_type_selector" type="select" label="Provide a dbSNP reference-ordered data file" help="-D,--dbsnp &lt;dbsnp&gt;"> | 
|  | 133         <option value="set_dbsnp" selected="True">Set dbSNP</option> | 
|  | 134         <option value="exclude_dbsnp">Don't set dbSNP</option> | 
|  | 135       </param> | 
|  | 136       <when value="exclude_dbsnp"> | 
|  | 137         <!-- Do nothing here --> | 
|  | 138       </when> | 
|  | 139       <when value="set_dbsnp"> | 
|  | 140         <param name="dbsnp_input_rod" type="data" format="vcf" label="dbSNP ROD file" /> | 
|  | 141         <param name="dbsnp_rod_name" type="hidden" value="dbsnp" label="dbSNP ROD Name"/> | 
|  | 142         <param name="dbsnp_known_names" type="boolean" truevalue="--known_names" falsevalue="" label="Use dbSNP ROD as known_names" help="-knownName,--known_names &lt;known_names&gt;" /> | 
|  | 143       </when> | 
|  | 144     </conditional> | 
|  | 145 | 
|  | 146     <expand macro="gatk_param_type_conditional" /> | 
|  | 147 | 
|  | 148 | 
|  | 149     <expand macro="analysis_type_conditional"> | 
|  | 150         <repeat name="stratifications" title="Stratification"> | 
|  | 151           <param name="select_exps" value="" type="text" label="Stratification Expression" help="-select,--select_exps &lt;select_exps&gt;"> | 
|  | 152             <sanitizer> | 
|  | 153               <valid initial="string.printable"> | 
|  | 154                <remove value="'"/> | 
|  | 155              </valid> | 
|  | 156               <mapping initial="none"/> | 
|  | 157             </sanitizer> | 
|  | 158           </param> | 
|  | 159           <param name="select_name" value="" type="text" label="Name" help="-selectName,--select_names &lt;select_names&gt;"/> | 
|  | 160         </repeat> | 
|  | 161 | 
|  | 162         <repeat name="samples" title="Sample" help="-sn,--sample &lt;sample&gt;"> | 
|  | 163           <param name="sample" value="" type="text" label="Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context"/> | 
|  | 164         </repeat> | 
|  | 165 | 
|  | 166         <param name="stratification_modules" type="select" multiple="True" display="checkboxes" label="Stratification modules to apply to the eval track(s)" help="-ST,--stratificationModule &lt;stratificationModule&gt;" > | 
|  | 167           <!-- do these need individual options also? gatk wiki has little info --> | 
|  | 168           <option value="AlleleFrequency" /> | 
|  | 169           <option value="AlleleCount" /> | 
|  | 170           <option value="CompRod" /> | 
|  | 171           <option value="Contig" /> | 
|  | 172           <option value="CpG" /> | 
|  | 173           <option value="Degeneracy" /> | 
|  | 174           <option value="EvalRod" /> | 
|  | 175           <option value="Filter" /> | 
|  | 176           <option value="FunctionalClass" /> | 
|  | 177           <option value="JexlExpression" /> | 
|  | 178           <option value="Sample" /> | 
|  | 179           <option value="IntervalStratification" /> | 
|  | 180         </param> | 
|  | 181         <param name="do_not_use_all_standard_stratifications" checked="false" type="boolean" truevalue="--doNotUseAllStandardStratifications" falsevalue="" label="Do not use the standard stratification modules by default" help="-noST,--doNotUseAllStandardStratifications" /> | 
|  | 182 | 
|  | 183         <repeat name="only_variants_of_type" title="only Variants Of Type" help="--onlyVariantsOfType"> | 
|  | 184           <param name="variant_type" type="text" value="" label="only variants of these types will be considered during the evaluation"/> | 
|  | 185         </repeat> | 
|  | 186 | 
|  | 187         <param name="eval_modules" type="select" multiple="True" display="checkboxes" label="Eval modules to apply to the eval track(s)" help="-EV,--evalModule &lt;evalModule&gt;" > | 
|  | 188           <!-- do these need individual options also? gatk wiki has little info --> | 
|  | 189           <option value="ACTransitionTable" /> | 
|  | 190           <option value="AlleleFrequencyComparison" /> | 
|  | 191           <option value="AminoAcidTransition" /> | 
|  | 192           <option value="CompOverlap" /> | 
|  | 193           <option value="CountVariants" /> | 
|  | 194           <option value="GenotypeConcordance" /> | 
|  | 195           <option value="GenotypePhasingEvaluator" /> | 
|  | 196           <option value="IndelMetricsByAC" /> | 
|  | 197           <option value="IndelStatistics" /> | 
|  | 198           <option value="MendelianViolationEvaluator" /> | 
|  | 199           <option value="PrintMissingComp" /> | 
|  | 200           <option value="PrivatePermutations" /> | 
|  | 201           <option value="SimpleMetricsByAC" /> | 
|  | 202           <option value="ThetaVariantEvaluator" /> | 
|  | 203           <option value="TiTvVariantEvaluator" /> | 
|  | 204           <option value="VariantQualityScore" /> | 
|  | 205         </param> | 
|  | 206         <param name="do_not_use_all_standard_modules" checked="false" type="boolean" truevalue="--doNotUseAllStandardModules" falsevalue="" label="Do not use the standard eval modules by default" help="-noEV,--doNotUseAllStandardModules" /> | 
|  | 207 | 
|  | 208         <param name="num_samples" type="integer" label="Number of samples (used if no samples are available in the VCF file" value="0" help="-ns,--numSamples &lt;numSamples&gt;"/> | 
|  | 209         <param name="min_phase_quality" type="float" label="Minimum phasing quality " value="10.0" help="-mpq,--minPhaseQuality &lt;minPhaseQuality&gt;"/> | 
|  | 210         <param name="family" type="text" value="" label="If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined" help="--family_structure"/> | 
|  | 211         <param name="mendelian_violation_qual_threshold" type="integer" label="Minimum genotype QUAL score for each trio member required to accept a site as a violation" value="50" help="-mvq,--mendelianViolationQualThreshold &lt;mendelianViolationQualThreshold&gt;"/> | 
|  | 212         <param name="ancestral_alignments" type="data" format="fasta" optional="True" label="Fasta file with ancestral alleles" help="-aa,--ancestralAlignments &lt;ancestralAlignments&gt;" /> | 
|  | 213         <param name="known_cnvs" type="data" format="bed,gatk_interval,picard_interval_list" optional="True" label="File containing tribble-readable features describing a known list of copy number variants" help="-knownCNVs,--knownCNVs &lt;knownCNVs&gt;" /> | 
|  | 214         <param name="strat_intervals" type="data" format="bed,gatk_interval,picard_interval_list" optional="True" label="File containing tribble-readable features for the IntervalStratificiation" help="-stratIntervals,--stratIntervals &lt;stratIntervals&gt;" /> | 
|  | 215 | 
|  | 216     </expand> | 
|  | 217 | 
|  | 218 | 
|  | 219   </inputs> | 
|  | 220   <outputs> | 
|  | 221     <data format="gatk_report" name="output_report" label="${tool.name} on ${on_string} (report)" /> | 
|  | 222     <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> | 
|  | 223   </outputs> | 
|  | 224   <tests> | 
|  | 225       <test> | 
|  | 226           <param name="reference_source_selector" value="history" /> | 
|  | 227           <param name="ref_file" value="phiX.fasta" ftype="fasta" /> | 
|  | 228           <param name="input_variant" value="gatk/gatk_variant_annotator/gatk_variant_annotator_out_1.vcf" ftype="vcf" /> | 
|  | 229           <param name="dbsnp_rod_bind_type_selector" value="set_dbsnp" /> | 
|  | 230           <param name="dbsnp_input_rod" value="gatk/fake_phiX_variant_locations.vcf" ftype="vcf" /> | 
|  | 231           <param name="dbsnp_known_names" value="True"/> | 
|  | 232           <param name="comp_rod_bind" value="0" /> | 
|  | 233           <param name="gatk_param_type_selector" value="basic" /> | 
|  | 234           <param name="analysis_param_type_selector" value="basic" /> | 
|  | 235           <output name="output_report" file="gatk/gatk_variant_eval/gatk_variant_eval_out_1.gatk_report" /> | 
|  | 236           <output name="output_log" file="gatk/gatk_variant_eval/gatk_variant_eval_out_1.log.contains" compare="contains" /> | 
|  | 237       </test> | 
|  | 238   </tests> | 
|  | 239   <help> | 
|  | 240 **What it does** | 
|  | 241 | 
|  | 242 General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more) | 
|  | 243 | 
|  | 244 For more information on using the VariantEval module, see this `tool specific page <http://www.broadinstitute.org/gsa/wiki/index.php/VariantEval>`_. | 
|  | 245 | 
|  | 246 To learn about best practices for variant detection using GATK, see this `overview <http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3>`_. | 
|  | 247 | 
|  | 248 If you encounter errors, please view the `GATK FAQ <http://www.broadinstitute.org/gsa/wiki/index.php/Frequently_Asked_Questions>`_. | 
|  | 249 | 
|  | 250 ------ | 
|  | 251 | 
|  | 252 **Inputs** | 
|  | 253 | 
|  | 254 GenomeAnalysisTK: VariantEval accepts variant files as input. | 
|  | 255 | 
|  | 256 | 
|  | 257 **Outputs** | 
|  | 258 | 
|  | 259 The output is a table of variant evaluation. | 
|  | 260 | 
|  | 261 | 
|  | 262 Go `here <http://www.broadinstitute.org/gsa/wiki/index.php/Input_files_for_the_GATK>`_ for details on GATK file formats. | 
|  | 263 | 
|  | 264 | 
|  | 265 ------- | 
|  | 266 | 
|  | 267 **Settings**:: | 
|  | 268 | 
|  | 269  out                                   An output file presented to the walker. Will overwrite contents if file exists. | 
|  | 270  list                                  List the available eval modules and exit | 
|  | 271  select_exps                           One or more stratifications to use when evaluating the data | 
|  | 272  select_names                          Names to use for the list of stratifications (must be a 1-to-1 mapping) | 
|  | 273  sample                                Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context | 
|  | 274  known_names                           Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets | 
|  | 275  stratificationModule                  One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified) | 
|  | 276  doNotUseAllStandardStratifications    Do not use the standard stratification modules by default (instead, only those that are specified with the -S option) | 
|  | 277  onlyVariantsOfType                    If provided, only variants of these types will be considered during the evaluation, in | 
|  | 278  evalModule                            One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified) | 
|  | 279  doNotUseAllStandardModules            Do not use the standard modules by default (instead, only those that are specified with the -E option) | 
|  | 280  numSamples                            Number of samples (used if no samples are available in the VCF file | 
|  | 281  minPhaseQuality                       Minimum phasing quality | 
|  | 282  family_structure                      If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined | 
|  | 283  mendelianViolationQualThreshold       Minimum genotype QUAL score for each trio member required to accept a site as a violation | 
|  | 284  ancestralAlignments                   Fasta file with ancestral alleles | 
|  | 285 | 
|  | 286 @CITATION_SECTION@ | 
|  | 287   </help> | 
|  | 288 </tool> |