Mercurial > repos > mvdbeek > yaha
diff yaha.xml @ 0:0c888a0686bb draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/yaha commit 15b31d03f0dbc59ec544d4ce5837ff03b6936c27-dirty
author | mvdbeek |
---|---|
date | Thu, 29 Dec 2016 14:51:49 -0500 |
parents | |
children | 584220a3c520 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/yaha.xml Thu Dec 29 14:51:49 2016 -0500 @@ -0,0 +1,248 @@ +<tool id="yaha" name="yaha" version="0.1.83"> + <description>fast and flexible long-read alignment with optimal breakpoint detection</description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="0.1.83">yaha</requirement> + <requirement type="package" version="0.6.5">sambamba</requirement> + </requirements> + <version_command><![CDATA[yaha 2>&1| head -n1]]></version_command> + <command detect_errors="aggressive"><![CDATA[ + #if $reference_genome.reference_genome_source == "history" or "reference": + #if $reference_genome.reference_genome_source == "history": + ln -f -s "$reference_genome.history_item" genome.fa && + #elif $reference_genome.reference_genome_source == "reference": + ln -f -s "$reference_genome.fasta_item" genome.fa && + #end if + yaha -g genome.fa -H $reference_genome.maxHitsIndex -L $reference_genome.wordLenIndex -S $reference_genome.skipDistanceIndex && + #set maxHitsIndex = "%s%s" % ("0" * (5 - len(str($reference_genome.maxHitsIndex))), $reference_genome.maxHitsIndex) + #set skipdist = $reference_genome.skipDistanceIndex if len(str($reference_genome.skipDistanceIndex)) > 1 else "0%s" % $reference_genome.skipDistanceIndex + #set wordLenIndex = $reference_genome.wordLenIndex if len(str($reference_genome.wordLenIndex)) > 1 else "0%s" % $reference_genome.wordLenIndex + #set index_path = "genome.X%s_%s_%sS" % ($wordLenIndex, $skipdist, $maxHitsIndex) + #else: + #pass ## augment with pre-built index if considered useful + #end if + yaha + -x '$index_path' + -q '$q' + $outformat stdout + -t \${GALAXY_SLOTS:-1} + -BW $BW + -G $G + -H $H + -M $M + -MD $MD + -P $P + -X $X + #if $ags.use_ags == "yes": + -AGS Y + -GEC $ags.GEC + -GOC $ags.GOC + -MS $ags.MS + -RC $ags.RC + #else: + -AGS N + #end if + #if $oqc.use_oqc == "yes": + -OQC Y + -BP $oqc.BP + -MGDP $oqc.MGDP + -MNO $oqc.MNO + #else: + -OQC N + #end if + #if $fbs.use_fbs == "yes": + -FBS Y + -PRL $fbs.PRL + -PSS $fbs.PSS + #else: + -FBS N + #end if + | sambamba view -S -f bam /dev/stdin | sambamba sort -o '$alignment' -l 6 -t \${GALAXY_SLOTS:-1} /dev/stdin + ]]></command> + <inputs> + <param type="data" argument="q" label="Fastq reads to align" format="fastqsanger" /> + <conditional name="reference_genome"> + <param help="Built-in references were created using default options" label="Source for the reference genome to align against" name="reference_genome_source" type="select"> + <!-- Write a datamanager if prebuilt genomes are important + <option selected="True" value="indexed">Use a built-in index</option> + --> + <option value="history">Use a genome from history to build an index</option> + <option value="reference">Use a built-in genome to build an index</option> + </param> + <when value="indexed"> + <param help="If your genome of interest is not listed, contact the Galaxy team" label="Select a reference genome" name="index" type="select"> + <options from_data_table="yaha_indexes"> + <filter column="2" type="sort_by" /> + <validator message="No genomes are available for the selected input dataset" type="no_options" /> + </options> + </param> + </when> + <when value="history"> + <param format="fasta" label="Select the reference genome" name="history_item" type="data" /> + <expand macro="index_parameter"/> + </when> + <when value="reference"> + <param label="Select a reference genome" name="fasta_item" type="select"> + <options from_data_table="fasta_indexes"> + <filter column="2" type="sort_by"/> + </options> + </param> + <expand macro="index_parameter"/> + </when> + </conditional> + <param name="outformat" type="select" label="Produce alignment with softclipping?"> + <option value="-osh">Produce alignment with hard clipping</option> + <option value="-oss">Produce alignment with soft clipping</option> + </param> + <param type="integer" argument="-BW" value="5" min="0" label="BandWidth" help="band size on each side of the diagonal of banded Smith Waterman" /> + <param type="integer" argument="-G" value="50" min="0" label="maxGap" help="maximum indel size allowed with a single alignment" /> + <param type="integer" argument="-H" value="650" min="1" max="65525" label="maxHits" help="maximum times a seed is in the reference before it is ignored as too repetitive. To take advantage of k-mer sampling, use the same value of maxHits during index creation and alignment." /> + <param type="integer" argument="-M" value="25" min="0" label="minMatch" help="minimum number of bases in seeds to start an alignment" /> + <param type="integer" argument="-MD" value="50" min="0" label="MaxDesert" help="maximum number of contiguous bases without a seed before alignmment is split" /> + <param type="float" argument="-P" value="0.9" min="0" label="minPercent-identity" help="minimum matching/alignment-length for a query to be included in output" /> + <param type="integer" argument="-X" value="25" min="0" label="Xdropoff" help="maximum score dropoff before terminating alignment extensions" /> + <conditional name="ags"> + <param name="use_ags" type="select" label="Use Affine Gap Scoring?"> + <option value="yes" selected="True">Yes</option> + <option value="no">No</option> + </param> + <when value="yes"> + <param argument="-GEC" type="integer" value="2" min="0" label="GapExtensionCost" help="cost for extending a gap (indel)"/> + <param argument="-GOC" type="integer" value="5" min="0" label="GapOpenCost" help="cost for starting a new gap (indel)"/> + <param argument="-MS" type="integer" value="1" min="0" label="MatchScore" help="score added for each matching base"/> + <param argument="-RC" type="integer" value="3" min="0" label="ReplacementCost" help="score subtracted for each mismatched base"/> + </when> + <when value="no"> + </when> + </conditional> + <conditional name="oqc"> + <param name="use_oqc" type="select" label="Use Optimal Query Coverage Algorithm?" help=""> + <option value="yes" selected="True">Yes (Find a set of alignments are found that optimally cover the query, using the remaining options)</option> + <option value="no">No (Output all alignments meeting above criteria)</option> + </param> + <when value="yes"> + <param argument="-BP" type="integer" value="5" min="0" label="BreakpointPenalty" help="penalty for inserting a breakpoint in split-read alignment"/> + <param argument="-MGDP" type="integer" value="5" min="0" label="MaxGenomicDistancePenalty"/> + <param argument="-MNO" type="integer" value="25" min="0" label="MinNonOverlap" help="minimum number of unshared bases required in each split alignment"/> + </when> + <when value="no"> + </when> + </conditional> + <conditional name="fbs"> + <param name="use_fbs" type="select" label="Use Filter By Similarity Algorithm?" help=""> + <option value="yes" selected="False">Yes (Output alignments similar to best alignment found using OQC.)</option> + <option value="no">No</option> + </param> + <when value="yes"> + <param argument="-PRL" type="float" value="0.9" min="0" max="1" label="PercentReciprocalLength" help="minimum ratio of overlapping length between similar alignment"/> + <param argument="-PSS" type="float" value="0.9" min="0" max="1" label="PercentSimilarScore" help="minimum ratio of scores between similar alignments"/> + </when> + <when value="no"> + </when> + </conditional> + </inputs> + <outputs> + <data name="alignment" format="bam" /> + </outputs> + <tests> + <test> + <param name="q" value="input.fastq" ftype="fastqsanger"/> + <param name="reference_genome_source" value="history"/> + <param name="history_item" value="phiX.fa" ftype="fasta"/> + <output name="alignment" value="alignment.bam" ftype="bam"/> + </test> + </tests> + <help><![CDATA[ +Summary +------- + +*yaha* is an open source, flexible, sensitive and accurate DNA aligner +designed for single-end reads. It supports three major modes of +operation: + +1. The default “Optimal Query Coverage” (**-OQC**) mode reports the best + set of alignments that cover the length of each query. +2. Using “Filter By Similarity” (**-FBS**), along with the best set of + alignments, *yaha* will also output alignments that are highly + similar to an alignment in the best set. +3. Finally, *yaha* can output all the alignments found for each query. + +The **-OQC** and **-FBS** modes are specifically tuned to form split +read mappings that can be used to accurately identify structural +variation events (deletions, duplications, insertions or inversions) +between the subject query and the reference genome. + +Usage +----- + +**OPTIONS:** Default values enclosed in square brackets [] + +:: + + Input/Output Options: + -g FILE input genome file to use during index creation (FASTA or nib2) + -q FILE input file of sequence reads to align (FASTA or FASTQ) [STDIN] + -osh FILE output file for alignment output in SAM format with hard clipping(default) [STDOUT] + -oss FILE output file for alignment output in SAM format with soft clipping [STDOUT] + -x FILE reference index file to use during alignment + NOTE: At most one of -osh or -oss should be specified. + + Index Creation Options: + -H INT maxHits: During index creation, seeds occuring more than maxHits times will be sampled [65565] + -L INT seedLength: Length of seed to use. During alignment, seed length is taken from index file [15] + -S INT Skipdistance: Number of bases to skip ahead before forming next seed [1] + + General Alignment Options: + -BW INT BandWidth: band size on each side of the diagonal of banded Smith Waterman [5] + -G INT maxGap: maximum indel size allowed with a single alignment [50] + -H INT maxHits: maximum times a seed is in the reference before it is ignored as too repetitive [650] + -M INT minMatch: minimum number of bases in seeds to start an alignment [25] + -MD INT MaxDesert: maximum number of contiguous bases without a seed before alignmment is split [50] + -P REAL minPercent-identity: minimum matching/alignment-length for a query to be included in output [0.9] + -X INT Xdropoff: maximum score dropoff before terminating alignment extensions [25] + -t INT numThreads: number of threads used to parallel process reads [1] + + Affine Gap Scoring Options: + If -AGS is off, a simple edit distance calculation is done. + If on, the remaining options are used: + -AGS BOOL (Y|N) controls use of Affine Gap Scoring [Y]. + -GEC INT GapExtensionCost: cost for extending a gap (indel) [2] + -GOC INT GapOpenCost: cost for starting a new gap (indel) [5] + -MS INT MatchScore: score added for each matching base [1] + -RC INT ReplacementCost: score subtracted for each mismatched base [3] + + Optimal Query Coverage Options: + If -OQC if off, all alignments meeting above criteria are output. + If -OQC is on, a set of alignments are found that optimally cover the query, using the remaining options. + -OQC BOOL (Y|N) controls use of the Optimal Query Coverage Algorithm. + -BP INT BreakpointPenalty: penalty for inserting a breakpoint in split-read alignment [5] + -MGDP INT MaxGenomicDistancePenalty (5)] + -MNO INT MinNonOverlap: minimum number of unshared bases required in each split alignment [minMatch] + NOTE: The total cost of adding a breakpoint in a split-read mapping is: + BP*MIN(MGDP, Log10(genomic distance between reference loci)) + + Filter By Similarity Options: + If -FBS is on, the remaining options are used. An alignemnt must satisfy BOTH criteria to be "similar". + -FBS BOOL (Y|N) controls output of alignments similar to best alignment found using OQC. + -PRL REAL PercentReciprocalLength: minimum ratio of overlapping length between similar alignemnt [0.9] + -PSS REAL PercentSimilarScore: minimum ratio of scores between similar alignments [0.9] + +See the `User Guide <https://www.dropbox.com/s/7j758vpbaskcq20/YAHA_User_Guide.0.1.83.pdf?dl=0>`__ +for more details on all options and their usage. + +| **Written by:** Greg Faust (gf4ea@virginia.edu) +| `Ira Hall Lab, University of + Virginia <http://faculty.virginia.edu/irahall/>`__ + +| **Please cite:** +| `Faust G.G. and Hall I.M., "*YAHA*: fast and flexible long-read + alignment with optimal breakpoint detection," *Bioinformatics* Oct. + 2012; **28**\ (19): + 2417-2424. <http://bioinformatics.oxfordjournals.org/content/28/19/2417>`__ + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/bts456</citation> + </citations> +</tool>