comparison segemehl.xml @ 3:e1d38fef6dd5 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/segemehl commit 21aaee40723b5341b4236edeb0e72995c2054053
author bgruening
date Fri, 16 Dec 2016 06:25:17 -0500
parents dc63d59e7bf8
children 9ffdddb42700
comparison
equal deleted inserted replaced
2:dc63d59e7bf8 3:e1d38fef6dd5
1 <tool id="segemehl" name="segemehl" version="0.1.6.0"> 1 <tool id="segemehl" name="segemehl" version="0.2.0">
2 <description>based short read aligner</description> 2 <description>based short read aligner</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.1.6">segemehl</requirement> 4 <requirement type="package" version="0.2.0">segemehl</requirement>
5 </requirements> 5 </requirements>
6 <stdio>
7 <regex match="Exit forced"
8 source="both"
9 level="fatal"
10 description="Execution halted." />
11 </stdio>
6 <command> 12 <command>
13 <![CDATA[
7 ## prepare segemehl index if no reference genome is supplied 14 ## prepare segemehl index if no reference genome is supplied
8 temp_index = `mktemp`;
9 #if $refGenomeSource.genomeSource == "history": 15 #if $refGenomeSource.genomeSource == "history":
10 segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome; 16 mkdir ./temp_index/ &&
17 #set $temp_index = './temp_index/temp.idx'
18 segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome &&
11 #else: 19 #else:
12 $temp_index = ${refGenomeSource.index.fields.index_path} 20 #set $temp_index = $refGenomeSource.index.fields.index_path
13 #end if 21 #end if
14 22
15 23
16 ## execute segemehl 24 ## execute segemehl
17 segemehl.x 25 segemehl.x
18 26
19 ## number of threads 27 ## number of threads
20 -t "\${GALAXY_SLOTS:-12}" 28 -t "\${GALAXY_SLOTS:-12}"
21 29
22 ## db file path 30 #if $refGenomeSource.genomeSource == "history":
23 -d ${refGenomeSource.index.fields.db_path} 31 -d $refGenomeSource.own_reference_genome
32 #else:
33 -d ${refGenomeSource.index.fields.db_path}
34 #end if
24 35
25 -i $temp_index 36 -i $temp_index
26 37
27 ## check for single/pair-end 38 ## check for single/pair-end
28 #if str( $library.type ) == "single": 39 #if str( $library.type ) == "single":
29 #set $query_list = list() 40 #set $query_list = list()
30 ## prepare inputs 41 ## prepare inputs
31 #for $fastq in $library.reads: 42 #for $fastq in $library.input_query:
32 $query_list.append('%s' %($fastq.input_query)) 43 $query_list.append('%s' % $fastq )
33 #end for 44 #end for
34 -q "#echo ' '.join( $query_list )#" 45 -q "#echo ' '.join( $query_list )#"
35 #else 46 #else
36 ## prepare inputs 47 ## prepare inputs
37 48
38 #set $mate1 = list() 49 #set $mate1 = list()
39 #set $mate2 = list() 50 #set $mate2 = list()
40 #for $mate_pair in $library.mate_list: 51 #for $mate_pair in $library.mate_list:
41 $mate1.append( str($mate_pair.first_strand_query) ) 52 $mate1.append( str($mate_pair.first_strand_query) )
42 $mate2.append( str($mate_pair.second_strand_query) ) 53 $mate2.append( str($mate_pair.second_strand_query) )
49 #end if 60 #end if
50 -m $minsize 61 -m $minsize
51 -A $accuracy 62 -A $accuracy
52 -H $hitstrategy 63 -H $hitstrategy
53 #if str( $prime5 ).strip(): 64 #if str( $prime5 ).strip():
54 -P $prime5 65 -P "$prime5"
55 #end if 66 #end if
56 #if str( $prime3 ).strip(): 67 #if str( $prime3 ).strip():
57 -Q $prime3 68 -Q "$prime3"
58 #end if 69 #end if
59 $polyA 70 $polyA
60 $autoclip 71 $autoclip
61 $hardclip 72 $hardclip
62 $order 73 $order
74 $splits
75 #if $maxout:
76 --maxout $maxout
77 #end if
63 -s 78 -s
64 -o $segemehl_out 79
80 --minsplicecover $minsplicecover
81 --minfragscore $minfragscore
82 --minfraglen $minfraglen
83 --splicescorescale $splicescorescale
84
85 -o '$segemehl_out'
86 ]]>
65 </command> 87 </command>
66 <stdio>
67 <regex match="Exit forced"
68 source="both"
69 level="fatal"
70 description="Execution halted." />
71 </stdio>
72 <inputs> 88 <inputs>
73
74 <conditional name="refGenomeSource"> 89 <conditional name="refGenomeSource">
75 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> 90 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
76 <option value="indexed">Use a built-in index</option> 91 <option value="indexed">Use a built-in index</option>
77 <option value="history">Use one from the history</option> 92 <option value="history">Use one from the history</option>
78 </param> 93 </param>
88 <validator type="no_options" message="No indexes are available for the selected input dataset"/> 103 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
89 </options> 104 </options>
90 </param> 105 </param>
91 </when> <!-- build-in --> 106 </when> <!-- build-in -->
92 <when value="history"> 107 <when value="history">
93 <param name="own_reference_genome" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" /> 108 <param name="own_reference_genome" type="data" format="fasta" label="Select the reference genome" />
94 </when> <!-- history --> 109 </when> <!-- history -->
95 </conditional> <!-- refGenomeSource --> 110 </conditional> <!-- refGenomeSource -->
96
97 111
98 <conditional name="library"> 112 <conditional name="library">
99 <param name="type" type="select" label="Is this library paired-end?"> 113 <param name="type" type="select" label="Is this library paired-end?">
100 <option value="single">Single-end</option> 114 <option value="single">Single-end</option>
101 <option value="paired">Paired-end</option> 115 <option value="paired">Paired-end</option>
102 </param> 116 </param>
103 <when value="single"> 117 <when value="single">
104 <repeat name="reads" title="FASTQ/FASTA files"> 118 <param name="input_query" type="data" multiple="True" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads in FASTQ/FASTA files" />
105 <param name="input_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads fasta/fastq file" />
106 </repeat>
107 </when> 119 </when>
108 <when value="paired"> 120 <when value="paired">
121 <!-- ToDo paired coolections -->
109 <repeat name="mate_list" title="Paired End Pairs" min="1"> 122 <repeat name="mate_list" title="Paired End Pairs" min="1">
110 <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" /> 123 <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" />
111 <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" /> 124 <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" />
112 </repeat> 125 </repeat>
113 <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000 (-I)" /> 126 <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000 (-I)" />
114 </when> 127 </when>
115 </conditional> 128 </conditional>
116 129
130 <param name="minsplicecover" type="integer" value="80" label="Min coverage for spliced transcripts" help="(--minsplicecover)" />
131 <param name="minfragscore" type="integer" value="18" label="Min coverage for spliced transcripts" help="(--minfragscore)" />
132 <param name="minfraglen" type="integer" value="20" label="Min length of a spliced fragment" help="(--minfraglen)" />
133 <param name="splicescorescale" type="float" value="1.0" label="Report spliced alignment with score greater than this scale times the score"
134 help="Report only if this value x score is larger than next best spliced alignment (--splicescorescale)" />
117 135
118 <param name="minsize" type="integer" value="12" size="5" label="Minimum size of queries" help="default: 12 (-m)"> 136 <param name="minsize" type="integer" value="12" min="1" label="Minimum size of queries" help="(-m)" />
119 <validator type="in_range" min="1"/> 137
120 </param> 138 <param name="maxout" type="integer" min="0" value="0" optional="True"
121 <param name="accuracy" type="integer" value="85" size="5" label="Min percentage of matches per read in semi-global alignment" help="default: 85 (-A)" > 139 label="Maximum number of alignments that will be reported" help="(--maxout)" />
122 <validator type="in_range" min="1" max="100"/> 140 <param name="accuracy" type="integer" value="85" min="1" max="100" label="Min percentage of matches per read in semi-global alignment" help="(-A)" />
123 </param> 141
124 <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)"> 142 <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)">
125 <option value="1">report only best scoring hits</option> 143 <option value="1">report only best scoring hits</option>
126 <option value="0">report all scoring hits</option> 144 <option value="0">report all scoring hits</option>
127 </param> 145 </param>
128 <param name="prime5" type="text" size="80" label="add 5' adapter" help="default: none (-Q)" /> 146 <param name="prime5" type="text" label="add 5' adapter" help="default: none (-Q)" />
129 <param name="prime3" type="text" size="80" label="add 3' adapter" help="default: none (-P)"/> 147 <param name="prime3" type="text" label="add 3' adapter" help="default: none (-P)"/>
130 <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/> 148 <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/>
131 <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/> 149 <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/>
132 <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="-C"/> 150 <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="(-C)"/>
133 <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/> 151 <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/>
152 <param name="splits" type="boolean" truevalue="--splits" falsevalue="" checked="false" label="Detect split/spliced reads" help="(--splits)"/>
134 </inputs> 153 </inputs>
135
136 <outputs> 154 <outputs>
137 <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/> 155 <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/>
138 </outputs> 156 </outputs>
157 <tests>
158 <test>
159 <param name="genomeSource" value="history" />
160 <param name="own_reference_genome" value="chr1.fa" />
161 <param name="library" value="single" />
162 <param name="input_query" value="test.fastq" />
163 <param name="splits" value="true" />
164 <output name="segemehl_out" file="testmap.sam" lines_diff="2" />
165 </test>
166 </tests>
139 <help> 167 <help>
168 <![CDATA[
140 169
141 .. class:: infomark 170 .. class:: infomark
142 171
143 **What it does** 172 **What it does**
144 173
145 Segemehl_ is a short read mapper with gaps. 174 Segemehl_ is a short read mapper with gaps.
146 175
147 Segemehl_ is a software to map short sequencer reads to reference genomes. 176 Segemehl_ is a software to map short sequencer reads to reference genomes.
148 Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions. 177 Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions.
149 Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly. 178 Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly.
150 segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl_ allows bisulfite sequencing mapping and split read mapping. 179 segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl_ allows bisulfite sequencing mapping and split read mapping.
151 180
152 .. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/ 181 .. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/
153 182
154 **References**
155 183
156 Hoffmann S, Otto C, Kurtz S, Sharma CM, Khaitovich P, Vogel J, Stadler PF, Hackermueller J: "Fast mapping of short sequences with mismatches, insertions and deletions using index structures", PLoS Comput Biol (2009) vol. 5 (9) pp. e1000502 184 ]]>
157 download latest version: 0.1.6 manual: download here new stuff: faster multiple split read mapping bug fixes: bugfixes: increased sensitivity for strand switches changes: - default accuracy now 90% older segemehl indices are still usable. issues: untraceable errors with gcc compiler gcc-4.5. zlib linker problems with some ubuntu versions complaint department: steve bioinf uni leipzig deshapeimage_1_link_0shapeimage_1_link_1
158
159 </help> 185 </help>
186 <citations>
187 <citation type="doi">10.1371/journal.pcbi.1000502</citation>
188 </citations>
160 </tool> 189 </tool>