Mercurial > repos > mvdbeek > yaha
comparison yaha.xml @ 0:0c888a0686bb draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/yaha commit 15b31d03f0dbc59ec544d4ce5837ff03b6936c27-dirty
author | mvdbeek |
---|---|
date | Thu, 29 Dec 2016 14:51:49 -0500 |
parents | |
children | 584220a3c520 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0c888a0686bb |
---|---|
1 <tool id="yaha" name="yaha" version="0.1.83"> | |
2 <description>fast and flexible long-read alignment with optimal breakpoint detection</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <requirements> | |
7 <requirement type="package" version="0.1.83">yaha</requirement> | |
8 <requirement type="package" version="0.6.5">sambamba</requirement> | |
9 </requirements> | |
10 <version_command><![CDATA[yaha 2>&1| head -n1]]></version_command> | |
11 <command detect_errors="aggressive"><![CDATA[ | |
12 #if $reference_genome.reference_genome_source == "history" or "reference": | |
13 #if $reference_genome.reference_genome_source == "history": | |
14 ln -f -s "$reference_genome.history_item" genome.fa && | |
15 #elif $reference_genome.reference_genome_source == "reference": | |
16 ln -f -s "$reference_genome.fasta_item" genome.fa && | |
17 #end if | |
18 yaha -g genome.fa -H $reference_genome.maxHitsIndex -L $reference_genome.wordLenIndex -S $reference_genome.skipDistanceIndex && | |
19 #set maxHitsIndex = "%s%s" % ("0" * (5 - len(str($reference_genome.maxHitsIndex))), $reference_genome.maxHitsIndex) | |
20 #set skipdist = $reference_genome.skipDistanceIndex if len(str($reference_genome.skipDistanceIndex)) > 1 else "0%s" % $reference_genome.skipDistanceIndex | |
21 #set wordLenIndex = $reference_genome.wordLenIndex if len(str($reference_genome.wordLenIndex)) > 1 else "0%s" % $reference_genome.wordLenIndex | |
22 #set index_path = "genome.X%s_%s_%sS" % ($wordLenIndex, $skipdist, $maxHitsIndex) | |
23 #else: | |
24 #pass ## augment with pre-built index if considered useful | |
25 #end if | |
26 yaha | |
27 -x '$index_path' | |
28 -q '$q' | |
29 $outformat stdout | |
30 -t \${GALAXY_SLOTS:-1} | |
31 -BW $BW | |
32 -G $G | |
33 -H $H | |
34 -M $M | |
35 -MD $MD | |
36 -P $P | |
37 -X $X | |
38 #if $ags.use_ags == "yes": | |
39 -AGS Y | |
40 -GEC $ags.GEC | |
41 -GOC $ags.GOC | |
42 -MS $ags.MS | |
43 -RC $ags.RC | |
44 #else: | |
45 -AGS N | |
46 #end if | |
47 #if $oqc.use_oqc == "yes": | |
48 -OQC Y | |
49 -BP $oqc.BP | |
50 -MGDP $oqc.MGDP | |
51 -MNO $oqc.MNO | |
52 #else: | |
53 -OQC N | |
54 #end if | |
55 #if $fbs.use_fbs == "yes": | |
56 -FBS Y | |
57 -PRL $fbs.PRL | |
58 -PSS $fbs.PSS | |
59 #else: | |
60 -FBS N | |
61 #end if | |
62 | sambamba view -S -f bam /dev/stdin | sambamba sort -o '$alignment' -l 6 -t \${GALAXY_SLOTS:-1} /dev/stdin | |
63 ]]></command> | |
64 <inputs> | |
65 <param type="data" argument="q" label="Fastq reads to align" format="fastqsanger" /> | |
66 <conditional name="reference_genome"> | |
67 <param help="Built-in references were created using default options" label="Source for the reference genome to align against" name="reference_genome_source" type="select"> | |
68 <!-- Write a datamanager if prebuilt genomes are important | |
69 <option selected="True" value="indexed">Use a built-in index</option> | |
70 --> | |
71 <option value="history">Use a genome from history to build an index</option> | |
72 <option value="reference">Use a built-in genome to build an index</option> | |
73 </param> | |
74 <when value="indexed"> | |
75 <param help="If your genome of interest is not listed, contact the Galaxy team" label="Select a reference genome" name="index" type="select"> | |
76 <options from_data_table="yaha_indexes"> | |
77 <filter column="2" type="sort_by" /> | |
78 <validator message="No genomes are available for the selected input dataset" type="no_options" /> | |
79 </options> | |
80 </param> | |
81 </when> | |
82 <when value="history"> | |
83 <param format="fasta" label="Select the reference genome" name="history_item" type="data" /> | |
84 <expand macro="index_parameter"/> | |
85 </when> | |
86 <when value="reference"> | |
87 <param label="Select a reference genome" name="fasta_item" type="select"> | |
88 <options from_data_table="fasta_indexes"> | |
89 <filter column="2" type="sort_by"/> | |
90 </options> | |
91 </param> | |
92 <expand macro="index_parameter"/> | |
93 </when> | |
94 </conditional> | |
95 <param name="outformat" type="select" label="Produce alignment with softclipping?"> | |
96 <option value="-osh">Produce alignment with hard clipping</option> | |
97 <option value="-oss">Produce alignment with soft clipping</option> | |
98 </param> | |
99 <param type="integer" argument="-BW" value="5" min="0" label="BandWidth" help="band size on each side of the diagonal of banded Smith Waterman" /> | |
100 <param type="integer" argument="-G" value="50" min="0" label="maxGap" help="maximum indel size allowed with a single alignment" /> | |
101 <param type="integer" argument="-H" value="650" min="1" max="65525" label="maxHits" help="maximum times a seed is in the reference before it is ignored as too repetitive. To take advantage of k-mer sampling, use the same value of maxHits during index creation and alignment." /> | |
102 <param type="integer" argument="-M" value="25" min="0" label="minMatch" help="minimum number of bases in seeds to start an alignment" /> | |
103 <param type="integer" argument="-MD" value="50" min="0" label="MaxDesert" help="maximum number of contiguous bases without a seed before alignmment is split" /> | |
104 <param type="float" argument="-P" value="0.9" min="0" label="minPercent-identity" help="minimum matching/alignment-length for a query to be included in output" /> | |
105 <param type="integer" argument="-X" value="25" min="0" label="Xdropoff" help="maximum score dropoff before terminating alignment extensions" /> | |
106 <conditional name="ags"> | |
107 <param name="use_ags" type="select" label="Use Affine Gap Scoring?"> | |
108 <option value="yes" selected="True">Yes</option> | |
109 <option value="no">No</option> | |
110 </param> | |
111 <when value="yes"> | |
112 <param argument="-GEC" type="integer" value="2" min="0" label="GapExtensionCost" help="cost for extending a gap (indel)"/> | |
113 <param argument="-GOC" type="integer" value="5" min="0" label="GapOpenCost" help="cost for starting a new gap (indel)"/> | |
114 <param argument="-MS" type="integer" value="1" min="0" label="MatchScore" help="score added for each matching base"/> | |
115 <param argument="-RC" type="integer" value="3" min="0" label="ReplacementCost" help="score subtracted for each mismatched base"/> | |
116 </when> | |
117 <when value="no"> | |
118 </when> | |
119 </conditional> | |
120 <conditional name="oqc"> | |
121 <param name="use_oqc" type="select" label="Use Optimal Query Coverage Algorithm?" help=""> | |
122 <option value="yes" selected="True">Yes (Find a set of alignments are found that optimally cover the query, using the remaining options)</option> | |
123 <option value="no">No (Output all alignments meeting above criteria)</option> | |
124 </param> | |
125 <when value="yes"> | |
126 <param argument="-BP" type="integer" value="5" min="0" label="BreakpointPenalty" help="penalty for inserting a breakpoint in split-read alignment"/> | |
127 <param argument="-MGDP" type="integer" value="5" min="0" label="MaxGenomicDistancePenalty"/> | |
128 <param argument="-MNO" type="integer" value="25" min="0" label="MinNonOverlap" help="minimum number of unshared bases required in each split alignment"/> | |
129 </when> | |
130 <when value="no"> | |
131 </when> | |
132 </conditional> | |
133 <conditional name="fbs"> | |
134 <param name="use_fbs" type="select" label="Use Filter By Similarity Algorithm?" help=""> | |
135 <option value="yes" selected="False">Yes (Output alignments similar to best alignment found using OQC.)</option> | |
136 <option value="no">No</option> | |
137 </param> | |
138 <when value="yes"> | |
139 <param argument="-PRL" type="float" value="0.9" min="0" max="1" label="PercentReciprocalLength" help="minimum ratio of overlapping length between similar alignment"/> | |
140 <param argument="-PSS" type="float" value="0.9" min="0" max="1" label="PercentSimilarScore" help="minimum ratio of scores between similar alignments"/> | |
141 </when> | |
142 <when value="no"> | |
143 </when> | |
144 </conditional> | |
145 </inputs> | |
146 <outputs> | |
147 <data name="alignment" format="bam" /> | |
148 </outputs> | |
149 <tests> | |
150 <test> | |
151 <param name="q" value="input.fastq" ftype="fastqsanger"/> | |
152 <param name="reference_genome_source" value="history"/> | |
153 <param name="history_item" value="phiX.fa" ftype="fasta"/> | |
154 <output name="alignment" value="alignment.bam" ftype="bam"/> | |
155 </test> | |
156 </tests> | |
157 <help><![CDATA[ | |
158 Summary | |
159 ------- | |
160 | |
161 *yaha* is an open source, flexible, sensitive and accurate DNA aligner | |
162 designed for single-end reads. It supports three major modes of | |
163 operation: | |
164 | |
165 1. The default “Optimal Query Coverage” (**-OQC**) mode reports the best | |
166 set of alignments that cover the length of each query. | |
167 2. Using “Filter By Similarity” (**-FBS**), along with the best set of | |
168 alignments, *yaha* will also output alignments that are highly | |
169 similar to an alignment in the best set. | |
170 3. Finally, *yaha* can output all the alignments found for each query. | |
171 | |
172 The **-OQC** and **-FBS** modes are specifically tuned to form split | |
173 read mappings that can be used to accurately identify structural | |
174 variation events (deletions, duplications, insertions or inversions) | |
175 between the subject query and the reference genome. | |
176 | |
177 Usage | |
178 ----- | |
179 | |
180 **OPTIONS:** Default values enclosed in square brackets [] | |
181 | |
182 :: | |
183 | |
184 Input/Output Options: | |
185 -g FILE input genome file to use during index creation (FASTA or nib2) | |
186 -q FILE input file of sequence reads to align (FASTA or FASTQ) [STDIN] | |
187 -osh FILE output file for alignment output in SAM format with hard clipping(default) [STDOUT] | |
188 -oss FILE output file for alignment output in SAM format with soft clipping [STDOUT] | |
189 -x FILE reference index file to use during alignment | |
190 NOTE: At most one of -osh or -oss should be specified. | |
191 | |
192 Index Creation Options: | |
193 -H INT maxHits: During index creation, seeds occuring more than maxHits times will be sampled [65565] | |
194 -L INT seedLength: Length of seed to use. During alignment, seed length is taken from index file [15] | |
195 -S INT Skipdistance: Number of bases to skip ahead before forming next seed [1] | |
196 | |
197 General Alignment Options: | |
198 -BW INT BandWidth: band size on each side of the diagonal of banded Smith Waterman [5] | |
199 -G INT maxGap: maximum indel size allowed with a single alignment [50] | |
200 -H INT maxHits: maximum times a seed is in the reference before it is ignored as too repetitive [650] | |
201 -M INT minMatch: minimum number of bases in seeds to start an alignment [25] | |
202 -MD INT MaxDesert: maximum number of contiguous bases without a seed before alignmment is split [50] | |
203 -P REAL minPercent-identity: minimum matching/alignment-length for a query to be included in output [0.9] | |
204 -X INT Xdropoff: maximum score dropoff before terminating alignment extensions [25] | |
205 -t INT numThreads: number of threads used to parallel process reads [1] | |
206 | |
207 Affine Gap Scoring Options: | |
208 If -AGS is off, a simple edit distance calculation is done. | |
209 If on, the remaining options are used: | |
210 -AGS BOOL (Y|N) controls use of Affine Gap Scoring [Y]. | |
211 -GEC INT GapExtensionCost: cost for extending a gap (indel) [2] | |
212 -GOC INT GapOpenCost: cost for starting a new gap (indel) [5] | |
213 -MS INT MatchScore: score added for each matching base [1] | |
214 -RC INT ReplacementCost: score subtracted for each mismatched base [3] | |
215 | |
216 Optimal Query Coverage Options: | |
217 If -OQC if off, all alignments meeting above criteria are output. | |
218 If -OQC is on, a set of alignments are found that optimally cover the query, using the remaining options. | |
219 -OQC BOOL (Y|N) controls use of the Optimal Query Coverage Algorithm. | |
220 -BP INT BreakpointPenalty: penalty for inserting a breakpoint in split-read alignment [5] | |
221 -MGDP INT MaxGenomicDistancePenalty (5)] | |
222 -MNO INT MinNonOverlap: minimum number of unshared bases required in each split alignment [minMatch] | |
223 NOTE: The total cost of adding a breakpoint in a split-read mapping is: | |
224 BP*MIN(MGDP, Log10(genomic distance between reference loci)) | |
225 | |
226 Filter By Similarity Options: | |
227 If -FBS is on, the remaining options are used. An alignemnt must satisfy BOTH criteria to be "similar". | |
228 -FBS BOOL (Y|N) controls output of alignments similar to best alignment found using OQC. | |
229 -PRL REAL PercentReciprocalLength: minimum ratio of overlapping length between similar alignemnt [0.9] | |
230 -PSS REAL PercentSimilarScore: minimum ratio of scores between similar alignments [0.9] | |
231 | |
232 See the `User Guide <https://www.dropbox.com/s/7j758vpbaskcq20/YAHA_User_Guide.0.1.83.pdf?dl=0>`__ | |
233 for more details on all options and their usage. | |
234 | |
235 | **Written by:** Greg Faust (gf4ea@virginia.edu) | |
236 | `Ira Hall Lab, University of | |
237 Virginia <http://faculty.virginia.edu/irahall/>`__ | |
238 | |
239 | **Please cite:** | |
240 | `Faust G.G. and Hall I.M., "*YAHA*: fast and flexible long-read | |
241 alignment with optimal breakpoint detection," *Bioinformatics* Oct. | |
242 2012; **28**\ (19): | |
243 2417-2424. <http://bioinformatics.oxfordjournals.org/content/28/19/2417>`__ | |
244 ]]></help> | |
245 <citations> | |
246 <citation type="doi">10.1093/bioinformatics/bts456</citation> | |
247 </citations> | |
248 </tool> |