Mercurial > repos > matthias > stacks2_gstacks
diff stacks_gstacks.xml @ 0:ce90584be117 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit 98327d2948ae1ccb5aef5db9ab88605fd74a0de7-dirty
author | matthias |
---|---|
date | Thu, 29 Nov 2018 11:52:48 -0500 |
parents | |
children | 192f1d5f301e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/stacks_gstacks.xml Thu Nov 29 11:52:48 2018 -0500 @@ -0,0 +1,359 @@ +<tool id="stacks2_gstacks" name="Stacks2: gstacks" version="@WRAPPER_VERSION@"> + <description>match stacks to a catalog</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <command><![CDATA[ +@CLEAN_EXT@ + +mkdir bam_inputs stacks_outputs && +#if $mode_cond.mode_select == "denovo" and not $popmap: + #if $input_type.input_type_selector == 'manual': + #set count = len($input_type.input_bam) + #else + #set count = len($input_type.input_bam.keys()) + #end if + #if count == 1: + #for $bam in $input_type.input_bam: + ln -s '$bam' bam_inputs/catalog.bam && + #end for + #else + >&2 echo "exactly one (merged) bam file is needed in denovo mode if no population map is given" && + exit 1 && + #end if +#else + @BAM_INPUT@ +#end if + +gstacks + +#if $mode_cond.mode_select == "denovo": + -P bam_inputs + $mode_cond.ignore_pe_reads + #if $mode_cond.advanced_cond.advanced_select == "yes": + --kmer-length $mode_cond.advanced_cond.kmer_length + --max-debruijn-reads $mode_cond.advanced_cond.max_debruijn_reads + --min-kmer-cov $mode_cond.advanced_cond.min_kmer_cov + #end if +#else: + #if $popmap + -I bam_inputs + #else + $bamlist + #end if + #if $mode_cond.paired_cond.paired_select == '' + $mode_cond.paired_cond.rm_unpaired_reads + $mode_cond.paired_cond.rm_pcr_duplicates + #else: + $mode_cond.paired_cond.paired_select + #end if + #if $mode_cond.advanced_cond.advanced_select == "yes": + --min-mapq $mode_cond.advanced_cond.min_mapq + --max-clipped $mode_cond.advanced_cond.max_clipped + --max-insert-len $mode_cond.advanced_cond.max_insert_len + $mode_cond.advanced_cond.details + --phasing-cooccurrences-thr-range $mode_cond.advanced_cond.phasing_cooccurrences_thr_min,$mode_cond.advanced_cond.phasing_cooccurrences_thr_max + $mode_cond.advanced_cond.phasing_dont_prune_hets + #end if +#end if +#if $popmap + -M '$popmap' +#end if +-O stacks_outputs +-t \${GALAXY_SLOTS:-1} + +##Model options: +--model $model_cond.model +#if $model_cond.model != "snp": + --var-alpha $model_cond.var_alpha +#end if +--gt-alpha $model_cond.gt_alpha + +## annoyingly gstacks creates stacks_output/population.log +## instead of just writing to stderr as the other tools +## hence we do not use the tokens and return populations.log as log file and take the stderr +&& mv stacks_outputs/gstacks.log $output_log + +## the catalog.calls output is a gzip-ed vcf extract it +## to make it usable in Galaxy (with the downside that we +## need to gzip it again for downstream calls like populations) +&& gunzip -c stacks_outputs/catalog.calls > stacks_outputs/catalog.calls.vcf + + +## TODO extract individual distributions from stacks_outputs/gstacks.log.distribs +## alternative extra tool +## for i in \$(stacks-dist-extract stacks_outputs/gstacks.log.distribs) +## do +## stacks-dist-extract stacks_outputs/gstacks.log.distribs $i > stacks_outputs/gstacks.log.\$i.tsv +## done +## TODO make optional output collection + ]]></command> + + <inputs> + <expand macro="bam_input_macro"/> + <param name="popmap" type="data" format="tabular,txt" label="Population map" help="If set, matching will be done only for samples listed in this file" optional="true" argument="-M" /> + + <conditional name="mode_cond"> + <param name="mode_select" type="select" label="Mode"> + <option value="denovo" selected="true">De novo mode</option> + <option value="refbased">Reference-based</option> + </param> + <when value="denovo"> + <param argument="--ignore-pe-reads" name="ignore_pe_reads" type="boolean" checked="false" truevalue="--ignore-pe-reads" falsevalue="" label="ignore paired-end reads" help="ignore paired-end reads even if present in the input" /> + <conditional name="advanced_cond"> + <param name="advanced_select" type="select" label="Advanced options"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="yes"> + <param argument="--kmer-length" name="kmer_length" type="integer" value="31" min="2" max="31" label="kmer length for the de Bruijn graph" /> + <param argument="--max-debruijn-reads" name="max_debruijn_reads" type="integer" value="1000" min="1" label="maximum number of reads to use in the de Bruijn graph" /> + <param argument="--min-kmer-cov" name="min_kmer_cov" type="integer" value="2" label="minimum coverage to consider a kmer" /> + </when> + <when value="no"/> + </conditional> + </when> + <when value="refbased"> + <conditional name="paired_cond"> + <param name="paired_select" type="select" label="paired end options" help="select single/paired for single end data or to select advanced paired end options, --unpaired: treat reverse reads as if they were forward reads; --ignore-pe-reads: ignore paired-end reads even if present in the input"> + <option value="" selected="true">single/paired</option> + <option value="--unpaired" selected="true">ignore read pairing (--unpaired)</option> + <option value="--ignore-pe-reads" selected="true">ignore paired-end reads (--ignore-pe-reads)</option> + </param> + <when value=""> + <param argument="--rm-unpaired-reads" name="rm_unpaired_reads" type="boolean" checked="false" truevalue="--rm-unpaired-reads" falsevalue="" label="discard unpaired reads" /> + <param argument="--rm-pcr-duplicates" name="rm_pcr_duplicates" type="boolean" checked="false" truevalue="--rm-pcr-duplicates" falsevalue="" label="remove read pairs of the same sample that have the same insert length" help="implies --rm-unpaired-reads" /> + </when> + <when value="--unpaired"/> + <when value="--ignore-pe-reads"/> + </conditional> + <conditional name="advanced_cond"> + <param name="advanced_select" type="select" label="Advanced options"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="yes"> + <param argument="--min-mapq" name="min_mapq" type="integer" value="10" min="0" max="255" label="minimum PHRED-scaled mapping quality to consider a read" /> + <param argument="--max-clipped" name="max_clipped" type="float" value="0.2" min="0.0" max="1.1" label="maximum soft-clipping level" help="in fraction of read length" /> + <param argument="--max-insert-len" name="max_insert_len" type="integer" value="1000" min="0" label="maximum allowed sequencing insert length" /> + <param argument="--details" type="boolean" checked="false" truevalue="--details" falsevalue="" label="write a heaview output" /> + <param name="phasing_cooccurrences_thr_min" type="integer" value="1" min="0" label="edge coverage min" help="(--phasing-cooccurrences-thr-range)" /> + <param name="phasing_cooccurrences_thr_max" type="integer" value="2" min="0" label="edge coverage max" help="range of edge coverage thresholds to iterate over when building the graph of allele cooccurrences for SNP phasing (--phasing-cooccurrences-thr-range)"/> + <param argument="--phasing-dont-prune-hets" name="phasing_dont_prune_hets" type="boolean" checked="false" truevalue="--phasing-dont-prune-hets" falsevalue="" label="don't try to ignore dubious heterozygote genotypes during phasing" /> + </when> + <when value="no"/> + </conditional> + </when> + </conditional> + + <conditional name="model_cond"> + <param argument="--model" type="select" label="model to use to call variants and genotypes"> + <option value="marukilow" selected="true">marukilow</option> + <option value="marukihigh">marukihigh</option> + <option value="snp">snp</option> + </param> + <when value="marukilow"> + <expand macro="variant_calling_options_vg"/> + </when> + <when value="marukihigh"> + <expand macro="variant_calling_options_vg"/> + </when> + <when value="snp"> + <expand macro="variant_calling_options_g"/> + </when> + </conditional> + + <expand macro="in_log"/> + </inputs> + <outputs> + <expand macro="out_log"/> + <data format="txt" name="distribs" label="${tool.name} on ${on_string} distribs" from_work_dir="gstacks.log.distribs" /> + <expand macro="gstacks_outputs_macro"/> + </outputs> + + <tests> + <!-- denovomode, w popmap --> + <test> + <param name="input_type|input_type_selector" value="list"/> + <param name="input_type|input_bam"> + <collection type="list"> + <element name="PopA_01.matches" ftype="bam" value="tsv2bam/PopA_01.matches.bam" /> + <element name="PopA_02.matches" ftype="bam" value="tsv2bam/PopA_02.matches.bam" /> + </collection> + </param> + <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" /> + <conditional name="mode_cond"> + <param name="mode_select" value="denovo"/> + </conditional> + <param name="add_log" value="yes" /> + <output name="output_log" ftype="txt" file="gstacks/gstacks.log" lines_diff="8"/> + <output_collection name="gstacks_out" type="list" count="2"> + <element name="catalog.calls.vcf" file="gstacks/catalog.calls.tsv" ftype="vcf" lines_diff="2"/> + <element name="catalog.fa.gz" file="gstacks/catalog.fa.gz" ftype="fasta.gz" compare="sim_size"/> + </output_collection> + </test> + <!-- denovomode, wo popmap (allows for only one input), ignore PE, advanced, alt model --> + <test> + <param name="input_type|input_type_selector" value="manual"/> + <param name="input_type|input_bam" value="tsv2bam/PopA_01.matches.bam" ftype="bam"/> + <conditional name="mode_cond"> + <param name="mode_select" value="denovo" /> + <param name="ignore_pe_reads" value="--ignore-pe-reads" /> + <conditional name="advanced_cond"> + <param name="advanced_select" value="yes"/> + <param name="kmer_length" value="23" /> + <param name="max_debruijn_reads" value="666"/> + <param name="min_kmer_cov" value="3" /> + </conditional> + </conditional> + <conditional name="model_cond"> + <param name="model" value="marukihigh"/> + <param name="var_alpha" value="0.1" /> + <param name="gt_alpha" value="0.1" /> + </conditional> + <param name="add_log" value="yes" /> + <assert_command> + <has_text text="--ignore-pe-reads" /> + <has_text text="--rm-pcr-duplicates" /> + <has_text text="--kmer-length 23" /> + <has_text text="--max-debruijn-reads 666" /> + <has_text text="--min-kmer-cov 3" /> + <has_text text="--model marukihigh" /> + <has_text text="--var-alpha 0.1" /> + <has_text text="--gt-alpha 0.1" /> + </assert_command> + <output name="output_log"><assert_contents><has_text text="gstacks is done." /></assert_contents></output> + <output_collection name="gstacks_out" type="list" count="2"/> + </test> + <!-- refbased wo popmap, paired options, removing all unpaired reads results in an error --> + <test expect_failure="true" expect_exit_code="1"> + <param name="input_type|input_type_selector" value="manual"/> + <param name="input_type|input_bam" value="tsv2bam/PopA_01.matches.bam,tsv2bam/PopA_02.matches.bam"/> + <conditional name="mode_cond"> + <param name="mode_select" value="refbased"/> + <conditional name="paired_cond"> + <param name="paired_select" value=""/> + <!--<param name="rm_unpaired_reads" value="\-\-rm-unpaired-reads" /> removes to much of the test data and gstacks fails--> + <param name="rm_pcr_duplicates" value="--rm-pcr-duplicates" /> + </conditional> + </conditional> + <param name="add_log" value="yes" /> + <assert_command> + <has_text text="-I bam_inputs" /> + <not_has_text text="-B " /> + <has_text text="--rm-unpaired-reads" /> + <has_text text="--rm-pcr-duplicates" /> + </assert_command> + </test> + <!-- refbased w popmap (here bam names need to be equal to sample names in popmap), \-\-unpaired, advanced, snp model --> + <test> + <param name="input_type|input_type_selector" value="list"/> + <param name="input_type|input_bam"> + <collection type="list"> + <element name="PopA_01" ftype="bam" value="tsv2bam/PopA_01.matches.bam" /> + <element name="PopA_02" ftype="bam" value="tsv2bam/PopA_02.matches.bam" /> + </collection> + </param> + <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" /> + <conditional name="mode_cond"> + <param name="mode_select" value="refbased"/> + <conditional name="paired_cond"> + <param name="paired_select" value="--unpaired"/> + </conditional> + <conditional name="advanced_cond"> + <param name="advanced_select" value="yes" /> + <param name="min_mapq" value="23" /> + <param name="max_clipped" value="0.23" /> + <param name="max_insert_len" value="666" /> + <param name="details" value="--detailed"/> + <param name="phasing_cooccurrences_thr_min" value="2"/> + <param name="phasing_cooccurrences_thr_max" value="3"/> + <param name="phasing_dont_prune_hets" value="--phasing-dont-prune-hets" /> + </conditional> + </conditional> + <param name="model_cond|model" value="snp"/> + <param name="model_cond|gt_alpha" value="0.1" /> + <param name="add_log" value="yes" /> + <assert_command> + <not_has_text text="-I bam_inputs" /> + <has_text text="-B " /> + <has_text text="--unpaired" /> + <has_text text="--min-mapq 23" /> + <has_text text="--max-clipped 0.23" /> + <has_text text="--max-insert-len 666" /> + <has_text text="--detailed" /> + <has_text text="--phasing-cooccurrences-thr-range 2,3" /> + <has_text text="--phasing-dont-prune-hets" /> + <has_text text="--model snp" /> + <has_text text="--gt-alpha 0.1" /> + </assert_command> + <output name="output_log"><assert_contents><has_text text="gstacks is done." /></assert_contents></output> + <output_collection name="gstacks_out" type="list" count="2"/> + </test> + <!-- refbased wo popmap (here bam names don't matter), \-\-ignorepe --> + <test> + <param name="input_type|input_type_selector" value="list"/> + <param name="input_type|input_bam"> + <collection type="list"> + <element name="PopA_01.matches" ftype="bam" value="tsv2bam/PopA_01.matches.bam" /> + <element name="PopA_02.matches" ftype="bam" value="tsv2bam/PopA_02.matches.bam" /> + </collection> + </param> + <conditional name="mode_cond"> + <param name="mode_select" value="refbased"/> + <conditional name="paired_cond"> + <param name="paired_select" value="--ignore-pe-reads"/> + </conditional> + </conditional> + <param name="add_log" value="yes" /> + <assert_command> + <has_text text="-I bam_inputs" /> + <not_has_text text="-B " /> + <has_text text="--ignore-pe-reads" /> + </assert_command> + <output name="output_log"><assert_contents><has_text text="gstacks is done." /></assert_contents></output> + <output_collection name="gstacks_out" type="list" count="2"> + <element name="catalog.calls.vcf" file="gstacks/catalog.calls.tsv" ftype="vcf" lines_diff="2"/> + <element name="catalog.fa.gz" file="gstacks/catalog.fa.gz" ftype="fasta.gz" compare="sim_size"/> + </output_collection> + </test> + </tests> + + <help> +<![CDATA[ +.. class:: infomark + +**What it does** + +For de novo analyses, this program will pull in paired-end reads, if available, +assemble the paired-end contig and merge it with the single-end locus, align +reads to the locus, and call SNPs. + +For reference-aligned analyses, this program will build loci from the single +and/or paired-end reads before calling SNPs. The single- and paired-end reads +must be aligned and stored together in the intput BAM or SAM files and the +reads must be sorted. The gstacks program will detect if single- or paired-end +reads are present. + +In either mode, gstacks is able to remove PCR duplicates if requested. + +-------- + +**Input files** + +If a population map is given BAM records must be assigned to samples using BAM "reads groups" +(gstacks uses the ID/identifier and SM/sample name fields). Read groups +must be consistent if repeated different files. +Otherwise read groups are unneeded and ignored. + +**Output files** + +TODO + +@STACKS_INFOS@ +]]> + </help> + <expand macro="citation" /> +</tool>