Mercurial > repos > mvdbeek > yaha
changeset 0:0c888a0686bb draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/yaha commit 15b31d03f0dbc59ec544d4ce5837ff03b6936c27-dirty
author | mvdbeek |
---|---|
date | Thu, 29 Dec 2016 14:51:49 -0500 |
parents | |
children | 584220a3c520 |
files | all_fasta.loc.sample macros.xml test-data/alignment.bam test-data/input.fastq test-data/phiX.fa tool_data_table_conf.xml.sample yaha.xml |
diffstat | 7 files changed, 403 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/all_fasta.loc.sample Thu Dec 29 14:51:49 2016 -0500 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Dec 29 14:51:49 2016 -0500 @@ -0,0 +1,9 @@ +<?xml version="1.0"?> +<macros> + <xml name="index_parameter"> + <param argument="-H" name="maxHitsIndex" type="integer" value="65525" min="1" max="65525" label="maxHits" help="K-mers that appear > maxHits times in the genome will be randomly sampled down to max-hits locations. To take advantage of k-mer sampling, use the same value of maxHits during index creation and alignment."/> + <param argument="-L" name="wordLenIndex" type="integer" value="15" min="1" max="15" label="wordLen" help="the size of k-mers used for seed hits"/> + <param argument="-S" name="skipDistanceIndex" type="integer" value="1" min="1" max="15" label="Skip-distance" help="the distance to skip ahead in the genome before forming the next k-mer to index. Skip-distance must be between 1 and wordLen (inclusive)"> + </param> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.fastq Thu Dec 29 14:51:49 2016 -0500 @@ -0,0 +1,40 @@ +@phiX174_1980_2501_0:1:0_3:0:0_0/1 +TTAGGTGTGTGTAAAACAGGTGCCGAAGAAGCTGGATTAACAGAATTGAGAACCAGCTTATCAGAAAAAA ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_1542_1965_0:0:0_0:0:0_1/1 +CTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGCA ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_2950_3377_0:0:0_2:0:0_2/1 +CTCAAATCCGGCGTCAACCATACCAGCATAGGAAGCATCAGCACCAGCACGCTCCCAAGCATTAATCTCA ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_2259_2739_1:0:0_1:0:0_3/1 +CTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAAAAAAT ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_1141_1609_1:0:0_1:0:0_4/1 +TGGCGCTCTCCGTCTTTCTCCATTTCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTT ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_185_708_0:0:0_1:0:0_5/1 +CCTTTCGCCATCAACTAACGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCT ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_1363_1914_3:0:0_0:0:0_6/1 +GCGTTAAGGTACTGAATCTCTTTAGTCGCAGTAGGCGGAAAACGAACAAGCGCAAGAGTAAACATAGTGC ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_3199_3732_0:0:0_1:0:0_7/1 +CTGGCACTTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAA ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_36_572_1:0:0_0:0:0_8/1 +ACCATAAACGCAAGCCTCAACGCAGCGACGAGCACGAGAGCGGTCAGTAGCAATCCAAACTTTGTTACTC ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_2128_2577_0:0:0_4:0:0_9/1 +TTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAG ++ +2222222222222222222222222222222222222222222222222222222222222222222222
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/phiX.fa Thu Dec 29 14:51:49 2016 -0500 @@ -0,0 +1,79 @@ +>phiX174 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT +GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA +ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG +TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA +GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT +TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT +CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT +TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG +TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC +GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG +TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT +AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC +CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA +TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC +TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA +GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT +GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC +TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT +TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC +ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC +CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT +GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC +TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG +TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT +TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA +AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT +TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC +GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC +TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT +TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA +TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG +TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG +AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC +CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT +TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG +CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA +AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA +TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT +CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG +TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA +GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC +CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA +AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC +TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT +CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA +TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG +TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT +TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC +ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG +TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA +ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG +GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT +GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG +GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT +ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG +CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC +CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT +CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG +CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA +TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG +TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC +AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC +TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Dec 29 14:51:49 2016 -0500 @@ -0,0 +1,9 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/yaha.xml Thu Dec 29 14:51:49 2016 -0500 @@ -0,0 +1,248 @@ +<tool id="yaha" name="yaha" version="0.1.83"> + <description>fast and flexible long-read alignment with optimal breakpoint detection</description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="0.1.83">yaha</requirement> + <requirement type="package" version="0.6.5">sambamba</requirement> + </requirements> + <version_command><![CDATA[yaha 2>&1| head -n1]]></version_command> + <command detect_errors="aggressive"><![CDATA[ + #if $reference_genome.reference_genome_source == "history" or "reference": + #if $reference_genome.reference_genome_source == "history": + ln -f -s "$reference_genome.history_item" genome.fa && + #elif $reference_genome.reference_genome_source == "reference": + ln -f -s "$reference_genome.fasta_item" genome.fa && + #end if + yaha -g genome.fa -H $reference_genome.maxHitsIndex -L $reference_genome.wordLenIndex -S $reference_genome.skipDistanceIndex && + #set maxHitsIndex = "%s%s" % ("0" * (5 - len(str($reference_genome.maxHitsIndex))), $reference_genome.maxHitsIndex) + #set skipdist = $reference_genome.skipDistanceIndex if len(str($reference_genome.skipDistanceIndex)) > 1 else "0%s" % $reference_genome.skipDistanceIndex + #set wordLenIndex = $reference_genome.wordLenIndex if len(str($reference_genome.wordLenIndex)) > 1 else "0%s" % $reference_genome.wordLenIndex + #set index_path = "genome.X%s_%s_%sS" % ($wordLenIndex, $skipdist, $maxHitsIndex) + #else: + #pass ## augment with pre-built index if considered useful + #end if + yaha + -x '$index_path' + -q '$q' + $outformat stdout + -t \${GALAXY_SLOTS:-1} + -BW $BW + -G $G + -H $H + -M $M + -MD $MD + -P $P + -X $X + #if $ags.use_ags == "yes": + -AGS Y + -GEC $ags.GEC + -GOC $ags.GOC + -MS $ags.MS + -RC $ags.RC + #else: + -AGS N + #end if + #if $oqc.use_oqc == "yes": + -OQC Y + -BP $oqc.BP + -MGDP $oqc.MGDP + -MNO $oqc.MNO + #else: + -OQC N + #end if + #if $fbs.use_fbs == "yes": + -FBS Y + -PRL $fbs.PRL + -PSS $fbs.PSS + #else: + -FBS N + #end if + | sambamba view -S -f bam /dev/stdin | sambamba sort -o '$alignment' -l 6 -t \${GALAXY_SLOTS:-1} /dev/stdin + ]]></command> + <inputs> + <param type="data" argument="q" label="Fastq reads to align" format="fastqsanger" /> + <conditional name="reference_genome"> + <param help="Built-in references were created using default options" label="Source for the reference genome to align against" name="reference_genome_source" type="select"> + <!-- Write a datamanager if prebuilt genomes are important + <option selected="True" value="indexed">Use a built-in index</option> + --> + <option value="history">Use a genome from history to build an index</option> + <option value="reference">Use a built-in genome to build an index</option> + </param> + <when value="indexed"> + <param help="If your genome of interest is not listed, contact the Galaxy team" label="Select a reference genome" name="index" type="select"> + <options from_data_table="yaha_indexes"> + <filter column="2" type="sort_by" /> + <validator message="No genomes are available for the selected input dataset" type="no_options" /> + </options> + </param> + </when> + <when value="history"> + <param format="fasta" label="Select the reference genome" name="history_item" type="data" /> + <expand macro="index_parameter"/> + </when> + <when value="reference"> + <param label="Select a reference genome" name="fasta_item" type="select"> + <options from_data_table="fasta_indexes"> + <filter column="2" type="sort_by"/> + </options> + </param> + <expand macro="index_parameter"/> + </when> + </conditional> + <param name="outformat" type="select" label="Produce alignment with softclipping?"> + <option value="-osh">Produce alignment with hard clipping</option> + <option value="-oss">Produce alignment with soft clipping</option> + </param> + <param type="integer" argument="-BW" value="5" min="0" label="BandWidth" help="band size on each side of the diagonal of banded Smith Waterman" /> + <param type="integer" argument="-G" value="50" min="0" label="maxGap" help="maximum indel size allowed with a single alignment" /> + <param type="integer" argument="-H" value="650" min="1" max="65525" label="maxHits" help="maximum times a seed is in the reference before it is ignored as too repetitive. To take advantage of k-mer sampling, use the same value of maxHits during index creation and alignment." /> + <param type="integer" argument="-M" value="25" min="0" label="minMatch" help="minimum number of bases in seeds to start an alignment" /> + <param type="integer" argument="-MD" value="50" min="0" label="MaxDesert" help="maximum number of contiguous bases without a seed before alignmment is split" /> + <param type="float" argument="-P" value="0.9" min="0" label="minPercent-identity" help="minimum matching/alignment-length for a query to be included in output" /> + <param type="integer" argument="-X" value="25" min="0" label="Xdropoff" help="maximum score dropoff before terminating alignment extensions" /> + <conditional name="ags"> + <param name="use_ags" type="select" label="Use Affine Gap Scoring?"> + <option value="yes" selected="True">Yes</option> + <option value="no">No</option> + </param> + <when value="yes"> + <param argument="-GEC" type="integer" value="2" min="0" label="GapExtensionCost" help="cost for extending a gap (indel)"/> + <param argument="-GOC" type="integer" value="5" min="0" label="GapOpenCost" help="cost for starting a new gap (indel)"/> + <param argument="-MS" type="integer" value="1" min="0" label="MatchScore" help="score added for each matching base"/> + <param argument="-RC" type="integer" value="3" min="0" label="ReplacementCost" help="score subtracted for each mismatched base"/> + </when> + <when value="no"> + </when> + </conditional> + <conditional name="oqc"> + <param name="use_oqc" type="select" label="Use Optimal Query Coverage Algorithm?" help=""> + <option value="yes" selected="True">Yes (Find a set of alignments are found that optimally cover the query, using the remaining options)</option> + <option value="no">No (Output all alignments meeting above criteria)</option> + </param> + <when value="yes"> + <param argument="-BP" type="integer" value="5" min="0" label="BreakpointPenalty" help="penalty for inserting a breakpoint in split-read alignment"/> + <param argument="-MGDP" type="integer" value="5" min="0" label="MaxGenomicDistancePenalty"/> + <param argument="-MNO" type="integer" value="25" min="0" label="MinNonOverlap" help="minimum number of unshared bases required in each split alignment"/> + </when> + <when value="no"> + </when> + </conditional> + <conditional name="fbs"> + <param name="use_fbs" type="select" label="Use Filter By Similarity Algorithm?" help=""> + <option value="yes" selected="False">Yes (Output alignments similar to best alignment found using OQC.)</option> + <option value="no">No</option> + </param> + <when value="yes"> + <param argument="-PRL" type="float" value="0.9" min="0" max="1" label="PercentReciprocalLength" help="minimum ratio of overlapping length between similar alignment"/> + <param argument="-PSS" type="float" value="0.9" min="0" max="1" label="PercentSimilarScore" help="minimum ratio of scores between similar alignments"/> + </when> + <when value="no"> + </when> + </conditional> + </inputs> + <outputs> + <data name="alignment" format="bam" /> + </outputs> + <tests> + <test> + <param name="q" value="input.fastq" ftype="fastqsanger"/> + <param name="reference_genome_source" value="history"/> + <param name="history_item" value="phiX.fa" ftype="fasta"/> + <output name="alignment" value="alignment.bam" ftype="bam"/> + </test> + </tests> + <help><![CDATA[ +Summary +------- + +*yaha* is an open source, flexible, sensitive and accurate DNA aligner +designed for single-end reads. It supports three major modes of +operation: + +1. The default “Optimal Query Coverage” (**-OQC**) mode reports the best + set of alignments that cover the length of each query. +2. Using “Filter By Similarity” (**-FBS**), along with the best set of + alignments, *yaha* will also output alignments that are highly + similar to an alignment in the best set. +3. Finally, *yaha* can output all the alignments found for each query. + +The **-OQC** and **-FBS** modes are specifically tuned to form split +read mappings that can be used to accurately identify structural +variation events (deletions, duplications, insertions or inversions) +between the subject query and the reference genome. + +Usage +----- + +**OPTIONS:** Default values enclosed in square brackets [] + +:: + + Input/Output Options: + -g FILE input genome file to use during index creation (FASTA or nib2) + -q FILE input file of sequence reads to align (FASTA or FASTQ) [STDIN] + -osh FILE output file for alignment output in SAM format with hard clipping(default) [STDOUT] + -oss FILE output file for alignment output in SAM format with soft clipping [STDOUT] + -x FILE reference index file to use during alignment + NOTE: At most one of -osh or -oss should be specified. + + Index Creation Options: + -H INT maxHits: During index creation, seeds occuring more than maxHits times will be sampled [65565] + -L INT seedLength: Length of seed to use. During alignment, seed length is taken from index file [15] + -S INT Skipdistance: Number of bases to skip ahead before forming next seed [1] + + General Alignment Options: + -BW INT BandWidth: band size on each side of the diagonal of banded Smith Waterman [5] + -G INT maxGap: maximum indel size allowed with a single alignment [50] + -H INT maxHits: maximum times a seed is in the reference before it is ignored as too repetitive [650] + -M INT minMatch: minimum number of bases in seeds to start an alignment [25] + -MD INT MaxDesert: maximum number of contiguous bases without a seed before alignmment is split [50] + -P REAL minPercent-identity: minimum matching/alignment-length for a query to be included in output [0.9] + -X INT Xdropoff: maximum score dropoff before terminating alignment extensions [25] + -t INT numThreads: number of threads used to parallel process reads [1] + + Affine Gap Scoring Options: + If -AGS is off, a simple edit distance calculation is done. + If on, the remaining options are used: + -AGS BOOL (Y|N) controls use of Affine Gap Scoring [Y]. + -GEC INT GapExtensionCost: cost for extending a gap (indel) [2] + -GOC INT GapOpenCost: cost for starting a new gap (indel) [5] + -MS INT MatchScore: score added for each matching base [1] + -RC INT ReplacementCost: score subtracted for each mismatched base [3] + + Optimal Query Coverage Options: + If -OQC if off, all alignments meeting above criteria are output. + If -OQC is on, a set of alignments are found that optimally cover the query, using the remaining options. + -OQC BOOL (Y|N) controls use of the Optimal Query Coverage Algorithm. + -BP INT BreakpointPenalty: penalty for inserting a breakpoint in split-read alignment [5] + -MGDP INT MaxGenomicDistancePenalty (5)] + -MNO INT MinNonOverlap: minimum number of unshared bases required in each split alignment [minMatch] + NOTE: The total cost of adding a breakpoint in a split-read mapping is: + BP*MIN(MGDP, Log10(genomic distance between reference loci)) + + Filter By Similarity Options: + If -FBS is on, the remaining options are used. An alignemnt must satisfy BOTH criteria to be "similar". + -FBS BOOL (Y|N) controls output of alignments similar to best alignment found using OQC. + -PRL REAL PercentReciprocalLength: minimum ratio of overlapping length between similar alignemnt [0.9] + -PSS REAL PercentSimilarScore: minimum ratio of scores between similar alignments [0.9] + +See the `User Guide <https://www.dropbox.com/s/7j758vpbaskcq20/YAHA_User_Guide.0.1.83.pdf?dl=0>`__ +for more details on all options and their usage. + +| **Written by:** Greg Faust (gf4ea@virginia.edu) +| `Ira Hall Lab, University of + Virginia <http://faculty.virginia.edu/irahall/>`__ + +| **Please cite:** +| `Faust G.G. and Hall I.M., "*YAHA*: fast and flexible long-read + alignment with optimal breakpoint detection," *Bioinformatics* Oct. + 2012; **28**\ (19): + 2417-2424. <http://bioinformatics.oxfordjournals.org/content/28/19/2417>`__ + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/bts456</citation> + </citations> +</tool>