Mercurial > repos > iuc > stacks2_gstacks
comparison stacks_gstacks.xml @ 0:6e4b604a1920 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit b395fa36fa826e26085820ba3a9faacaeddcb460
| author | iuc |
|---|---|
| date | Mon, 01 Jul 2019 10:50:24 -0400 |
| parents | |
| children | 8b6aa27346b7 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:6e4b604a1920 |
|---|---|
| 1 <tool id="stacks2_gstacks" name="Stacks2: gstacks" profile="@PROFILE@" version="@STACKS_VERSION@+galaxy@WRAPPER_VERSION@"> | |
| 2 <description>Call variants, genotypes and haplotype</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements"> | |
| 7 <requirement type="package" version="1.9">samtools</requirement> | |
| 8 </expand> | |
| 9 <expand macro="version_cmd"/> | |
| 10 <command detect_errors="aggressive"><![CDATA[ | |
| 11 @FASTQ_INPUT_FUNCTIONS@ | |
| 12 | |
| 13 mkdir bam_inputs stacks_outputs && | |
| 14 #if $mode_cond.mode_select == "denovo" and not $popmap: | |
| 15 ## since collections have no len .. yet | |
| 16 #try: | |
| 17 #set count = len($input_bam) | |
| 18 #except: | |
| 19 #set count = len($input_bam.keys()) | |
| 20 #end try | |
| 21 #if count == 1: | |
| 22 #for $bam in $input_bam: | |
| 23 ln -s '$bam' bam_inputs/catalog.bam && | |
| 24 #end for | |
| 25 #else | |
| 26 >&2 echo "exactly one (merged) bam file is needed in denovo mode if no population map is given" && | |
| 27 exit 1 && | |
| 28 #end if | |
| 29 #else | |
| 30 @BAM_INPUT@ | |
| 31 #end if | |
| 32 | |
| 33 gstacks | |
| 34 | |
| 35 #if $mode_cond.mode_select == "denovo": | |
| 36 -P bam_inputs | |
| 37 $mode_cond.ignore_pe_reads | |
| 38 #if $mode_cond.advanced_cond.advanced_select == "yes": | |
| 39 --kmer-length $mode_cond.advanced_cond.kmer_length | |
| 40 --max-debruijn-reads $mode_cond.advanced_cond.max_debruijn_reads | |
| 41 --min-kmer-cov $mode_cond.advanced_cond.min_kmer_cov | |
| 42 $mode_cond.advanced_cond.write_alignments | |
| 43 #end if | |
| 44 #else: | |
| 45 #if $popmap | |
| 46 -I bam_inputs | |
| 47 #else | |
| 48 $bamlist | |
| 49 #end if | |
| 50 #if $mode_cond.paired_cond.paired_select == '' | |
| 51 $mode_cond.paired_cond.rm_unpaired_reads | |
| 52 $mode_cond.paired_cond.rm_pcr_duplicates | |
| 53 #else: | |
| 54 $mode_cond.paired_cond.paired_select | |
| 55 #end if | |
| 56 #if $mode_cond.advanced_cond.advanced_select == "yes": | |
| 57 --min-mapq $mode_cond.advanced_cond.min_mapq | |
| 58 --max-clipped $mode_cond.advanced_cond.max_clipped | |
| 59 --max-insert-len $mode_cond.advanced_cond.max_insert_len | |
| 60 $mode_cond.advanced_cond.details | |
| 61 --phasing-cooccurrences-thr-range $mode_cond.advanced_cond.phasing_cooccurrences_thr_min,$mode_cond.advanced_cond.phasing_cooccurrences_thr_max | |
| 62 $mode_cond.advanced_cond.phasing_dont_prune_hets | |
| 63 #end if | |
| 64 #end if | |
| 65 #if $popmap | |
| 66 -M '$popmap' | |
| 67 #end if | |
| 68 -O stacks_outputs | |
| 69 -t \${GALAXY_SLOTS:-1} | |
| 70 | |
| 71 ##Model options: | |
| 72 --model $model_cond.model | |
| 73 --var-alpha $model_cond.var_alpha | |
| 74 --gt-alpha $model_cond.gt_alpha | |
| 75 | |
| 76 | |
| 77 ## the bam files generated by gstacks (--write-alignments) are seemingly buggy | |
| 78 ## (https://groups.google.com/d/msg/stacks-users/CazwJY1DPGA/7vuahiB2GgAJ) | |
| 79 ## so we fix them temporarily by piping them through samtools view (disabling all | |
| 80 ## exit codes and stderr output) this adds the samtools requirement | |
| 81 ## for later versions where this is fixed the output bam files could just be moved | |
| 82 ## to stacks_outputs if this is still necessary | |
| 83 #if $mode_cond.mode_select == "denovo" and $mode_cond.advanced_cond.advanced_select == "yes" and $mode_cond.advanced_cond.write_alignments != "" | |
| 84 #if $popmap: | |
| 85 && for b in bam_inputs/*alns.bam; do (samtools view -b "\$b" || true) 2> /dev/null > stacks_outputs/\$(basename "\$b"); done | |
| 86 #else | |
| 87 && (samtools view -b bam_inputs/alignments.bam || true) 2> /dev/null > stacks_outputs/alignments.bam | |
| 88 #end if | |
| 89 #end if | |
| 90 | |
| 91 | |
| 92 ## annoyingly gstacks creates stacks_output/gstacks.log | |
| 93 ## instead of just writing to stderr as the other tools | |
| 94 ## hence we do not use the tokens and return populations.log as log file and take the stderr | |
| 95 #if $output_log | |
| 96 && mv stacks_outputs/gstacks.log $output_log | |
| 97 #end if | |
| 98 | |
| 99 @EXTRACT_VCF@ | |
| 100 | |
| 101 ## TODO extract individual distributions from stacks_outputs/gstacks.log.distribs | |
| 102 ## alternative extra tool | |
| 103 ## for i in \$(stacks-dist-extract stacks_outputs/gstacks.log.distribs) | |
| 104 ## do | |
| 105 ## stacks-dist-extract stacks_outputs/gstacks.log.distribs $i > stacks_outputs/gstacks.log.\$i.tsv | |
| 106 ## done | |
| 107 ## TODO make optional output collection | |
| 108 ]]></command> | |
| 109 | |
| 110 <inputs> | |
| 111 <expand macro="bam_input_macro"/> | |
| 112 <param name="popmap" type="data" format="tabular,txt" label="Population map" help="If set, matching will be done only for samples listed in this file" optional="true" argument="-M" /> | |
| 113 | |
| 114 <conditional name="mode_cond"> | |
| 115 <param name="mode_select" type="select" label="Mode"> | |
| 116 <option value="denovo" selected="true">De novo mode</option> | |
| 117 <option value="refbased">Reference-based</option> | |
| 118 </param> | |
| 119 <when value="denovo"> | |
| 120 <param argument="--ignore-pe-reads" name="ignore_pe_reads" type="boolean" checked="false" truevalue="--ignore-pe-reads" falsevalue="" label="Ignore paired-end reads" help="ignore paired-end reads even if present in the input" /> | |
| 121 <conditional name="advanced_cond"> | |
| 122 <param name="advanced_select" type="select" label="Advanced options"> | |
| 123 <option value="no">No</option> | |
| 124 <option value="yes">Yes</option> | |
| 125 </param> | |
| 126 <when value="yes"> | |
| 127 <param argument="--kmer-length" name="kmer_length" type="integer" value="31" min="2" max="31" label="K-mer length for the de Bruijn graph" /> | |
| 128 <param argument="--max-debruijn-reads" name="max_debruijn_reads" type="integer" value="1000" min="1" label="Maximum number of reads to use in the de Bruijn graph" /> | |
| 129 <param argument="--min-kmer-cov" name="min_kmer_cov" type="integer" value="2" label="Minimum coverage to consider a kmer" /> | |
| 130 <param argument="--write-alignments" name="write_alignments" type="boolean" checked="false" truevalue="--write-alignments" falsevalue="" label="save read alignments" help="heavy BAM files"/> | |
| 131 </when> | |
| 132 <when value="no"/> | |
| 133 </conditional> | |
| 134 </when> | |
| 135 <when value="refbased"> | |
| 136 <conditional name="paired_cond"> | |
| 137 <param name="paired_select" type="select" label="Paired end options" help="select single/paired for single end data or to select advanced paired end options, --unpaired: treat reverse reads as if they were forward reads; --ignore-pe-reads: ignore paired-end reads even if present in the input"> | |
| 138 <option value="" selected="true">single/paired</option> | |
| 139 <option value="--unpaired" selected="true">ignore read pairing (--unpaired)</option> | |
| 140 <option value="--ignore-pe-reads" selected="true">ignore paired-end reads (--ignore-pe-reads)</option> | |
| 141 </param> | |
| 142 <when value=""> | |
| 143 <param argument="--rm-unpaired-reads" name="rm_unpaired_reads" type="boolean" checked="false" truevalue="--rm-unpaired-reads" falsevalue="" label="Discard unpaired reads" /> | |
| 144 <param argument="--rm-pcr-duplicates" name="rm_pcr_duplicates" type="boolean" checked="false" truevalue="--rm-pcr-duplicates" falsevalue="" label="Remove read pairs of the same sample that have the same insert length" help="implies --rm-unpaired-reads" /> | |
| 145 </when> | |
| 146 <when value="--unpaired"/> | |
| 147 <when value="--ignore-pe-reads"/> | |
| 148 </conditional> | |
| 149 <conditional name="advanced_cond"> | |
| 150 <param name="advanced_select" type="select" label="Advanced options"> | |
| 151 <option value="no">No</option> | |
| 152 <option value="yes">Yes</option> | |
| 153 </param> | |
| 154 <when value="yes"> | |
| 155 <param argument="--min-mapq" name="min_mapq" type="integer" value="10" min="0" max="255" label="Minimum PHRED-scaled mapping quality to consider a read" /> | |
| 156 <param argument="--max-clipped" name="max_clipped" type="float" value="0.2" min="0.0" max="1.1" label="Maximum soft-clipping level" help="in fraction of read length" /> | |
| 157 <param argument="--max-insert-len" name="max_insert_len" type="integer" value="1000" min="0" label="Maximum allowed sequencing insert length" /> | |
| 158 <param argument="--details" type="boolean" checked="false" truevalue="--details" falsevalue="" label="Write a heaview output" /> | |
| 159 <param name="phasing_cooccurrences_thr_min" type="integer" value="1" min="0" label="Edge coverage min" help="(--phasing-cooccurrences-thr-range)" /> | |
| 160 <param name="phasing_cooccurrences_thr_max" type="integer" value="2" min="0" label="Edge coverage max" help="range of edge coverage thresholds to iterate over when building the graph of allele cooccurrences for SNP phasing (--phasing-cooccurrences-thr-range)"/> | |
| 161 <param argument="--phasing-dont-prune-hets" name="phasing_dont_prune_hets" type="boolean" checked="false" truevalue="--phasing-dont-prune-hets" falsevalue="" label="Don't try to ignore dubious heterozygote genotypes during phasing" /> | |
| 162 </when> | |
| 163 <when value="no"/> | |
| 164 </conditional> | |
| 165 </when> | |
| 166 </conditional> | |
| 167 | |
| 168 <conditional name="model_cond"> | |
| 169 <param argument="--model" type="select" label="Model to use to call variants and genotypes"> | |
| 170 <option value="marukilow" selected="true">marukilow</option> | |
| 171 <option value="marukihigh">marukihigh</option> | |
| 172 <option value="snp">snp</option> | |
| 173 </param> | |
| 174 <when value="marukilow"> | |
| 175 <expand macro="variant_calling_options_vg" varalpha_default="0.01"/> | |
| 176 </when> | |
| 177 <when value="marukihigh"> | |
| 178 <expand macro="variant_calling_options_vg"/> | |
| 179 </when> | |
| 180 <when value="snp"> | |
| 181 <expand macro="variant_calling_options_vg"/> | |
| 182 </when> | |
| 183 </conditional> | |
| 184 <param name="add_log_distribs" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="Add log distribs output as dataset" /> | |
| 185 <expand macro="in_log"/> | |
| 186 </inputs> | |
| 187 <outputs> | |
| 188 <expand macro="out_log"/> | |
| 189 <expand macro="gstacks_outputs_full_macro"/> | |
| 190 </outputs> | |
| 191 | |
| 192 <tests> | |
| 193 <!-- denovomode, w popmap --> | |
| 194 <test expect_num_outputs="3"> | |
| 195 <param name="input_bam" ftype="bam" value="tsv2bam/PopA_01.matches.bam,tsv2bam/PopA_02.matches.bam"/> | |
| 196 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" /> | |
| 197 <conditional name="mode_cond"> | |
| 198 <param name="mode_select" value="denovo"/> | |
| 199 </conditional> | |
| 200 <param name="add_log" value="yes" /> | |
| 201 <param name="add_log_distribs" value="yes" /> | |
| 202 <output name="output_log" ftype="txt" file="gstacks/gstacks.log" lines_diff="8"/> | |
| 203 <output name="distribs" ftype="txt" file="gstacks/gstacks.log.distribs" compare="sim_size"/> | |
| 204 <output_collection name="gstacks_out" type="list" count="2"> | |
| 205 <element name="catalog.calls.vcf" file="gstacks/catalog.calls.vcf" ftype="vcf" lines_diff="2"/> | |
| 206 <element name="catalog.fa.gz" file="gstacks/catalog.fa.gz" ftype="fasta.gz"/> | |
| 207 </output_collection> | |
| 208 </test> | |
| 209 <!-- denovomode, w popmap, write alignments --> | |
| 210 <test expect_num_outputs="3"> | |
| 211 <param name="input_bam" ftype="bam" value="tsv2bam/PopA_01.matches.bam,tsv2bam/PopA_02.matches.bam"/> | |
| 212 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" /> | |
| 213 <conditional name="mode_cond"> | |
| 214 <param name="mode_select" value="denovo"/> | |
| 215 <conditional name="advanced_cond"> | |
| 216 <param name="advanced_select" value="yes"/> | |
| 217 <param name="write_alignments" value="--write-alignments" /> | |
| 218 </conditional> | |
| 219 </conditional> | |
| 220 <param name="add_log" value="yes" /> | |
| 221 <assert_command> | |
| 222 <has_text text="--write-alignments" /> | |
| 223 </assert_command> | |
| 224 <output name="output_log" ftype="txt"><assert_contents><has_text text="done." /></assert_contents></output> | |
| 225 <output_collection name="gstacks_out" type="list" count="2"/> | |
| 226 <output_collection name="gstacks_alns_out" type="list" count="2"> | |
| 227 <element name="PopA_01" file="gstacks/PopA_01.alns.bam" ftype="bam" /> | |
| 228 <element name="PopA_02" file="gstacks/PopA_02.alns.bam" ftype="bam" /> | |
| 229 </output_collection> | |
| 230 </test> | |
| 231 <!-- denovomode, wo popmap (allows for only one input), ignore PE, advanced, alt model --> | |
| 232 <test expect_num_outputs="3"> | |
| 233 <param name="input_bam" value="tsv2bam/PopA_01.matches.bam" ftype="bam"/> | |
| 234 <conditional name="mode_cond"> | |
| 235 <param name="mode_select" value="denovo" /> | |
| 236 <param name="ignore_pe_reads" value="--ignore-pe-reads" /> | |
| 237 <conditional name="advanced_cond"> | |
| 238 <param name="advanced_select" value="yes"/> | |
| 239 <param name="kmer_length" value="23" /> | |
| 240 <param name="max_debruijn_reads" value="666"/> | |
| 241 <param name="min_kmer_cov" value="3" /> | |
| 242 <param name="write_alignments" value="--write-alignments" /> | |
| 243 </conditional> | |
| 244 </conditional> | |
| 245 <conditional name="model_cond"> | |
| 246 <param name="model" value="marukihigh"/> | |
| 247 <param name="var_alpha" value="0.1" /> | |
| 248 <param name="gt_alpha" value="0.1" /> | |
| 249 </conditional> | |
| 250 <param name="add_log" value="yes" /> | |
| 251 <assert_command> | |
| 252 <has_text text="--ignore-pe-reads" /> | |
| 253 <has_text text="--rm-pcr-duplicates" /> | |
| 254 <has_text text="--kmer-length 23" /> | |
| 255 <has_text text="--max-debruijn-reads 666" /> | |
| 256 <has_text text="--min-kmer-cov 3" /> | |
| 257 <has_text text="--write-alignments" /> | |
| 258 <has_text text="--model marukihigh" /> | |
| 259 <has_text text="--var-alpha 0.1" /> | |
| 260 <has_text text="--gt-alpha 0.1" /> | |
| 261 </assert_command> | |
| 262 <output name="output_log" ftype="txt"><assert_contents><has_text text="done." /></assert_contents></output> | |
| 263 <output_collection name="gstacks_out" type="list" count="2"/> | |
| 264 <output name="gstacks_aln_out" ftype="bam" file="gstacks/alignments.bam" /> | |
| 265 </test> | |
| 266 <!-- refbased wo popmap, paired options, removing all unpaired reads results in an error --> | |
| 267 <test expect_failure="true" expect_exit_code="1"> | |
| 268 <param name="input_bam" value="tsv2bam/PopA_01.bam,tsv2bam/PopA_02.bam"/> | |
| 269 <conditional name="mode_cond"> | |
| 270 <param name="mode_select" value="refbased"/> | |
| 271 <conditional name="paired_cond"> | |
| 272 <param name="paired_select" value=""/> | |
| 273 <!--<param name="rm_unpaired_reads" value="\-\-rm-unpaired-reads" /> removes to much of the test data and gstacks fails--> | |
| 274 <param name="rm_pcr_duplicates" value="--rm-pcr-duplicates" /> | |
| 275 </conditional> | |
| 276 </conditional> | |
| 277 <param name="add_log" value="yes" /> | |
| 278 <assert_command> | |
| 279 <has_text text="-I bam_inputs" /> | |
| 280 <not_has_text text="-B " /> | |
| 281 <has_text text="--rm-unpaired-reads" /> | |
| 282 <has_text text="--rm-pcr-duplicates" /> | |
| 283 </assert_command> | |
| 284 </test> | |
| 285 <!-- refbased w popmap (here bam names need to be equal to sample names in popmap), \-\-unpaired, advanced, snp model --> | |
| 286 <test expect_num_outputs="2"> | |
| 287 <param name="input_bam" ftype="bam" value="tsv2bam/PopA_01.bam,tsv2bam/PopA_02.bam"/> | |
| 288 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" /> | |
| 289 <conditional name="mode_cond"> | |
| 290 <param name="mode_select" value="refbased"/> | |
| 291 <conditional name="paired_cond"> | |
| 292 <param name="paired_select" value="--unpaired"/> | |
| 293 </conditional> | |
| 294 <conditional name="advanced_cond"> | |
| 295 <param name="advanced_select" value="yes" /> | |
| 296 <param name="min_mapq" value="23" /> | |
| 297 <param name="max_clipped" value="0.23" /> | |
| 298 <param name="max_insert_len" value="666" /> | |
| 299 <param name="details" value="--detailed"/> | |
| 300 <param name="phasing_cooccurrences_thr_min" value="2"/> | |
| 301 <param name="phasing_cooccurrences_thr_max" value="3"/> | |
| 302 <param name="phasing_dont_prune_hets" value="--phasing-dont-prune-hets" /> | |
| 303 </conditional> | |
| 304 </conditional> | |
| 305 <param name="model_cond|model" value="snp"/> | |
| 306 <param name="model_cond|gt_alpha" value="0.1" /> | |
| 307 <param name="model_cond|var_alpha" value="0.1" /> | |
| 308 <param name="add_log" value="yes" /> | |
| 309 <assert_command> | |
| 310 <not_has_text text="-I bam_inputs" /> | |
| 311 <has_text text="-B " /> | |
| 312 <has_text text="--unpaired" /> | |
| 313 <has_text text="--min-mapq 23" /> | |
| 314 <has_text text="--max-clipped 0.23" /> | |
| 315 <has_text text="--max-insert-len 666" /> | |
| 316 <has_text text="--detailed" /> | |
| 317 <has_text text="--phasing-cooccurrences-thr-range 2,3" /> | |
| 318 <has_text text="--phasing-dont-prune-hets" /> | |
| 319 <has_text text="--model snp" /> | |
| 320 <has_text text="--gt-alpha 0.1" /> | |
| 321 </assert_command> | |
| 322 <output name="output_log" ftype="txt"><assert_contents><has_text text="done." /></assert_contents></output> | |
| 323 <output_collection name="gstacks_out" type="list" count="2"/> | |
| 324 </test> | |
| 325 <!-- refbased wo popmap (here bam names don't matter), \-\-ignorepe --> | |
| 326 <test expect_num_outputs="2"> | |
| 327 <param name="input_bam" ftype="bam" value="tsv2bam/PopA_01.bam,tsv2bam/PopA_02.bam"/> | |
| 328 <conditional name="mode_cond"> | |
| 329 <param name="mode_select" value="refbased"/> | |
| 330 <conditional name="paired_cond"> | |
| 331 <param name="paired_select" value="--ignore-pe-reads"/> | |
| 332 </conditional> | |
| 333 </conditional> | |
| 334 <param name="add_log" value="yes" /> | |
| 335 <assert_command> | |
| 336 <has_text text="-I bam_inputs" /> | |
| 337 <not_has_text text="-B " /> | |
| 338 <has_text text="--ignore-pe-reads" /> | |
| 339 </assert_command> | |
| 340 <output name="output_log"><assert_contents><has_text text="gstacks is done." /></assert_contents></output> | |
| 341 <output_collection name="gstacks_out" type="list" count="2"/> | |
| 342 </test> | |
| 343 </tests> | |
| 344 | |
| 345 <help> | |
| 346 <![CDATA[ | |
| 347 .. class:: infomark | |
| 348 | |
| 349 **What it does** | |
| 350 | |
| 351 For de novo analyses, this program will pull in paired-end reads, if available, | |
| 352 assemble the paired-end contig and merge it with the single-end locus, align | |
| 353 reads to the locus, and call SNPs. | |
| 354 | |
| 355 For reference-aligned analyses, this program will build loci from the single | |
| 356 and/or paired-end reads before calling SNPs. The single- and paired-end reads | |
| 357 must be aligned and stored together in the intput BAM or SAM files and the | |
| 358 reads must be sorted. The gstacks program will detect if single- or paired-end | |
| 359 reads are present. | |
| 360 | |
| 361 In either mode, gstacks is able to remove PCR duplicates if requested. | |
| 362 | |
| 363 -------- | |
| 364 | |
| 365 **Input files** | |
| 366 | |
| 367 If a population map is given BAM records must be assigned to samples using BAM "reads groups" | |
| 368 (gstacks uses the ID/identifier and SM/sample name fields). Read groups | |
| 369 must be consistent if repeated different files. | |
| 370 Otherwise read groups are unneeded and ignored. | |
| 371 | |
| 372 **Output files** | |
| 373 | |
| 374 - Assembled contigs and variant sites | |
| 375 | |
| 376 - Optional outputs: Read alignments and log.distribs | |
| 377 | |
| 378 @STACKS_INFOS@ | |
| 379 ]]> | |
| 380 </help> | |
| 381 <expand macro="citation" /> | |
| 382 </tool> |
