comparison sortmerna.xml @ 15:baab049d3aff draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sortmerna commit e20e66ff81239452b3d75dec16e9e0cc8eb46266-dirty
author bebatut
date Wed, 10 Feb 2016 04:02:18 -0500
parents 4016c1db6886
children 1a4662c2d6db
comparison
equal deleted inserted replaced
14:011ec5258e71 15:baab049d3aff
1 <tool id="sortmerna" name="SortMeRNA" version="0.1.0"> 1 <tool id="bg_sortmerna" name="Filter with SortMeRNA" version="2.1.0">
2 <description>to filter ribosomal RNAs in metatranscriptomic data</description> 2 <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description>
3
4 <requirements> 3 <requirements>
5 <requirement type="package" version="2.0">sortmerna</requirement> 4 <requirement type="package" version="2.0">sortmerna</requirement>
6 </requirements> 5 </requirements>
7
8 <stdio> 6 <stdio>
9 <exit_code range="1:" /> 7 <regex match="This program builds a Burst trie on an input rRNA database"
8 source="both"
9 level="fatal"
10 description="Buildtrie program failed to execute." />
11 <regex match="The database name"
12 source="both"
13 level="fatal"
14 description="The database ${databases} has not been preprocessed using buildtrie before using SortMeRNA." />
15 <regex match="ERROR"
16 source="both"
17 level="fatal"
18 description="ERROR" />
10 </stdio> 19 </stdio>
11
12 <version_command> 20 <version_command>
13 <![CDATA[ 21 <![CDATA[
14 \${SORTMERNADIR}/sortmerna --version 2>&1|grep 'SortMeRNA version' 22 sortmerna --version 2>&1|grep 'SortMeRNA version'
15 ]]> 23 ]]>
16 </version_command> 24 </version_command>
17
18 <command> 25 <command>
19 <![CDATA[ 26 <![CDATA[
20 #set $ref = '' 27 #set $ref = ''
21 #set $sep='' 28 #set $sep=''
22 29 #if str( $databases_type.databases_selector ) == 'history'
23 #if str( $databases.databases_selector ) == 'history' 30 #for $db in $databases_type.database_name
24 #for $db in $databases.databases_name
25 #set $ref += $sep + str($db) + ',' + $os.path.splitext($os.path.basename(str($db)))[0] 31 #set $ref += $sep + str($db) + ',' + $os.path.splitext($os.path.basename(str($db)))[0]
26 #set $sep = ':' 32 #set $sep = ':'
27 #end for 33 #end for
28 #else 34 #else if str( $databases_type.databases_selector ) == 'cached_to_index'
29 ## databases path is not directly accessible, must match by hand with LOC file contents 35 ## databases path is not directly accessible, must match by hand with LOC file contents
30 #set $data_table = dict([(_[0], _[2]) for _ in $databases.databases_input.input.options.tool_data_table.data]) 36 #set $data_table = dict([(_[0], _[2]) for _ in $databases_type.input_databases.input.options.tool_data_table.data])
31 #for $db in $databases.databases_input.value 37 #for $db in $databases_type.input_databases.value
38 #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0] + '-reindexed'
39 #set $sep = ':'
40 #end for
41 #else:
42 ## databases path is not directly accessible, must match by hand with LOC file contents
43 #set $data_table = dict([(_[0], _[2]) for _ in $databases_type.input_databases.input.options.tool_data_table.data])
44 #for $db in $databases_type.input_databases.value
32 #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0] 45 #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0]
33 #set $sep = ':' 46 #set $sep = ':'
34 #end for 47 #end for
35 #end if 48 #end if
36 49
37 \${SORTMERNADIR}/indexdb_rna --ref $ref -L $seed_length --max_pos $max_pos 50 #if str( $databases_type.databases_selector ) != 'cached':
38 51 indexdb_rna
39 && 52 --ref $ref
40 53 -L $databases_type.seed_length
41 \${SORTMERNADIR}/sortmerna 54 --max_pos $databases_type.max_pos
42 --ref $ref 55 &&
43 --reads $input_sequence_file 56 #end if
44 --aligned aligned 57
45 58 sortmerna
46 $fastx.fastx_test 59 --ref $ref
47 #if $fastx.fastx_test == '--fastx' 60 --reads $input_reads
48 #if $fastx.fastx_rejected 61 --aligned aligned
49 --other other_file 62
50 #end if 63 #if str( $sequencing_type.sequencing_type_selector ) == 'paired'
51 #end if 64 $sequencing_type.paired_type
52 65 #end if
53 $sam.sam_test 66
54 #if $sam.sam_test == '--sam' 67 $strand_search
55 $sam.sam_sq_tag 68 $aligned_fastx.aligned_fastx_selector
56 #end if 69 #if $aligned_fastx.aligned_fastx_selector == '--fastx'
57 70 #if $aligned_fastx.other
58 $blast_format 71 --other other_file
59 72 #end if
60 $log 73 #end if
74 $aligned_sam.aligned_sam_selector
75 #if $aligned_sam.aligned_sam_selector == '--sam'
76 $aligned_sam.sq
77 #end if
78 $aligned_blast
79
80 $log
61 81
62 #if $report.report_type == 'best' 82 #if $report.report_type == 'best'
63 #if $report.report_best.report_best_type == '0' 83 #if $report.report_best.report_best_type == '1'
64 --best 0
65 #else if $report.report_best.report_best_type == '1'
66 --best 1 84 --best 1
67 --min_lis $report.report_best.report_best_min_lis 85 --min_lis $report.report_best.report_best_min_lis
68 #else 86 #else
69 --best $report.report_best.report_best_value 87 --best $report.report_best.report_best_value
70 --min_list $report.report_best.report_best_min_lis 88 --min_lis $report.report_best.report_best_min_lis
71 #end if 89 #end if
72 #else 90 #else
73 #if $report.report_num_alignments.report_num_alignments_type == '0' 91 #if $report.report_num_alignments.report_num_alignments_type == 'other_value'
74 --num_alignments 0 92 --num_alignments $report.report_num_alignments.report_num_alignments_value
75 #else if $report.report_num_alignments.report_num_alignments_type == '1'
76 --num_alignments 1
77 #else 93 #else
78 --num_alignments $report.report_num_alignments.report_num_alignments_value 94 --num_alignments $report.report_num_alignments.report_num_alignments_type
79 #end if 95 #end if
80 #end if 96 #end if
81 97
82 -e $e_value 98 -e $e_value
83 --match $match 99 --match $match
84 --mismatch $mismatch 100 --mismatch $mismatch
85 --gap_open $gap_open 101 --gap_open $gap_open
86 --gap_ext $gap_ext 102 --gap_ext $gap_ext
87 -N $ambiguous_letter 103 -N $ambiguous_letter
88 104 -a \${GALAXY_SLOTS:-1}
89 #if $strand == 'forward'
90 -F
91 #end if
92 #if $strand == 'reverse'
93 -R
94 #end if
95 ]]> 105 ]]>
96 </command> 106 </command>
97
98 <inputs> 107 <inputs>
99 <param name="input_sequence_file" type="data" format="fastq,fasta" label="Input sequence file" help=""/> 108 <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences" help="In FASTA or FASTQ format (--reads)"/>
100 109 <conditional name="sequencing_type">
101 <conditional name="databases"> 110 <param name="sequencing_type_selector" type="select" label="Sequencing type">
102 <param name="databases_selector" type="select" label="Databases to query" help=""> 111 <option value="not_paired">Reads are not paired</option>
103 <option value="cached" selected="true">Public ribosomal databases</option> 112 <option value="paired">Reads are paired</option>
113 </param>
114 <when value="not_paired" />
115 <when value="paired">
116 <param name="paired_type" type="select" display="radio" label="If one of the paired-end reads aligns and the other one does not">
117 <option value="">leave the reads split between aligned and rejected files</option>
118 <option value="--paired-in">output both reads to aligned file (--paired-in)</option>
119 <option value="--paired-out">output both reads to rejected file (--paired-out)</option>
120 </param>
121 </when>
122 </conditional>
123
124 <param name="strand_search" type="select" label="Which strands to search">
125 <option value="">Search both strands</option>
126 <option value="-F">Search only the forward strand (-F)</option>
127 <option value="-R">Search only the reverse-complementary strand (-R)</option>
128 </param>
129
130 <conditional name="databases_type">
131 <param name="databases_selector" type="select" label="Databases to query"
132 help="Public rRNA databases provided with SortMeRNA have been indexed.
133 On the contrary, personal databases must be indexed each time SortMeRNA is launched.
134 Please be patient, this may take some time depending on the size of the given database.">
135 <option value="cached" selected="true">Public pre-indexed ribosomal databases</option>
136 <option value="cached_to_index">Public ribosomal databases to index with non default parameters</option>
104 <option value="history">Databases from your history</option> 137 <option value="history">Databases from your history</option>
105 </param> 138 </param>
106 <when value="cached"> 139 <when value="cached">
107 <param name="databases_input" label="rRNA databases" type="select" display="checkboxes" multiple="true"> 140 <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true">
108 <options from_data_table="sortmerna_rRNA_databases" /> 141 <options from_data_table="rRNA_databases" />
109 <validator type="no_options" message="Select at least one database"/> 142 <validator type="no_options" message="Select at least one database"/>
110 </param> 143 </param>
111 </when> 144 </when>
145 <when value="cached_to_index">
146 <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true">
147 <options from_data_table="rRNA_databases" />
148 <validator type="no_options" message="Select at least one database"/>
149 </param>
150 <param name="seed_length" type="integer" min="0" max="100" value="18" label="Seed length for database indexing" help="(-L)"/>
151 <param name="max_pos" type="integer" min="0" max="100000" value="10000" label="Maximum number of positions to store for each k-mer for database indexing" help="With 0, all positions are stored (--max_pos)"/>
152 </when>
112 <when value="history"> 153 <when value="history">
113 <param name="databases_name" type="data" format="fasta" multiple="true" label="rRNA databases" 154 <param name="database_name" type="data" format="fasta" multiple="true" label="rRNA databases"
114 help=""/> 155 help="Your databases will be indexed first, which may take up to several minutes."/>
115 </when> 156 <param name="seed_length" type="integer" min="0" max="100" value="18" label="Seed length for database indexing" help="(-L)"/>
116 </conditional> 157 <param name="max_pos" type="integer" min="0" max="100000" value="10000" label="Maximum number of positions to store for each k-mer for database indexing" help="With 0, all positions are stored (--max_pos)"/>
117 158 </when>
118 <conditional name="fastx"> 159 </conditional>
119 <param name="fastx_test" type='select' label="Output into Fasta/FastQ file?" help=""> 160
120 <option value="--fastx">Yes</option> 161 <!-- Outputs -->
162 <conditional name="aligned_fastx">
163 <param name="aligned_fastx_selector" type="select" label="Include aligned reads in FASTA/FASTQ format?">
164 <option value="--fastx">Yes (--fastx)</option>
121 <option value="">No</option> 165 <option value="">No</option>
122 </param> 166 </param>
123 <when value="--fastx"> 167 <when value="--fastx">
124 <param name='fastx_rejected' type='boolean' checked="true" label="Conserve rejected reads?" help=""/> 168 <param name="other" type="boolean" label="Include rejected reads file?" help="(--other)" />
125 </when> 169 </when>
126 </conditional> 170 <when value="" />
127 171 </conditional>
128 <conditional name="sam"> 172 <conditional name="aligned_sam">
129 <param name="sam_test" type='select' label="Output SAM alignments?" help=""> 173 <param name="aligned_sam_selector" type="select" label="Include alignments in SAM format?">
130 <option value="--sam">Yes</option> 174 <option value="--sam">Yes (--sam)</option>
131 <option value="">No</option> 175 <option value="">No</option>
132 </param> 176 </param>
133 <when value="--sam"> 177 <when value="--sam">
134 <param name='sam_sq_tag' type='boolean' checked="true" truevalue="--SQ" falsevalue="" label="Add SQ tags to SAM file?" help=""/> 178 <param name="sq" type="boolean" truevalue="--SQ" falsevalue="" label="Add SQ tags to the SAM file" help="(--SQ)" />
135 </when> 179 </when>
136 </conditional> 180 <when value="" />
137 181 </conditional>
138 <param name="blast_format" type="select" display="radio" label="Format for BLAST output" help=""> 182 <param name="aligned_blast" type="select" label="Include alignments in BLAST-like format">
139 <option value="--blast 0">Pairwise</option> 183 <option value="--blast 0">pairwise (--blast 0)</option>
140 <option value="--blast 1">Tabular (Blast -m 8 format)</option> 184 <option value="--blast 1">tabular BLAST -m 8 format (--blast 1)</option>
141 <option value="--blast 2'">Tabular + column for CIGAR</option> 185 <option value="--blast 2">tabular + column for CIGAR (--blast 2)</option>
142 <option value="--blast 3" selected="true">Tabular + columns for CIGAR and query coverage</option> 186 <option value="--blast 3">tabular + columns for CIGAR and query coverage (--blast 3)</option>
143 <option value="">No Blast output</option> 187 <option value="" selected="true">No</option>
144 </param> 188 </param>
145 189 <param name="log" type="boolean" checked="False" truevalue="--log" falsevalue="" label="Generate statistics file"
146 <param name='log' type='boolean' checked="true" truevalue="--log" falsevalue="" label="Conserve overall statistic output into a log file?" help=""/> 190 help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)">
147 191 </param>
148 <conditional name="report"> 192 <conditional name="report">
149 <param name="report_type" type="select" display="radio" label="Parameters for filtering and read mapping" help=""> 193 <param name="report_type" type="select" label="Parameters for filtering and read mapping" help="">
150 <option value="best" selected="true">Report best alignments per read reaching E-value</option> 194 <option value="best" selected="true">Report best alignments per read reaching E-value</option>
151 <option value="num_alignments">Report first alignements per read reaching E-value</option> 195 <option value="num_alignments">Report first alignements per read reaching E-value</option>
152 </param> 196 </param>
153 <when value="best"> 197 <when value="best">
154 <conditional name="report_best"> 198 <conditional name="report_best">
155 <param name="report_best_type" type="select" display="radio" label="Number of searched alignments" help="Only the best alignment is reported"> 199 <param name="report_best_type" type="select" label="Number of searched alignments" help="Only the best alignment is reported (--best)">
156 <option value="0">All high-candidate reference sequences are searched for alignments (very slow)</option>
157 <option value="1" selected="true">Only one high-candidate reference sequence is searched for alignments (fast). The high-candidate sequences are determined heuristically using a LIS of seed matches)</option> 200 <option value="1" selected="true">Only one high-candidate reference sequence is searched for alignments (fast). The high-candidate sequences are determined heuristically using a LIS of seed matches)</option>
158 <option value="other_value">A custom number of reference sequences are searched for alignments (speed decrease for high value)</option> 201 <option value="other_value">A custom number of reference sequences are searched for alignments (speed decrease for high value)</option>
159 </param> 202 </param>
203 <when value="1">
204 <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment. (--min_lis)"/>
205 </when>
160 <when value="other_value"> 206 <when value="other_value">
161 <param name="report_best_value" type="integer" min="0" max="100" value="1" label="Number of alignments to be made" help="Only the best one is reported. The computation speed decrease with high value"/> 207 <param name="report_best_value" type="integer" min="2" max="100" value="2" label="Number of alignments to be made" help="Only the best one is reported. The computation speed decrease with high value"/>
162 <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment."/> 208 <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment. (--min_lis)"/>
163 </when>
164 <when value="1">
165 <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment."/>
166 </when> 209 </when>
167 </conditional> 210 </conditional>
168 </when> 211 </when>
169 <when value="num_alignments"> 212 <when value="num_alignments">
170 <conditional name="report_num_alignments"> 213 <conditional name="report_num_alignments">
171 <param name="report_num_alignments_type" type="select" display="radio" label="Number of output alignments" help=""> 214 <param name="report_num_alignments_type" type="select" label="Number of output alignments" help="(--num_alignments)">
172 <option value="0">All alignments reaching the E-value threshold are reported (very slow, this option is not suggested for high similarity rRNA databases)</option> 215 <option value="0">All alignments reaching the E-value threshold are reported (very slow, this option is not suggested for high similarity rRNA databases)</option>
173 <option value="1" selected="true">The first alignment passing E-value threshold are reported (very fast, best choice if only filtering is needed)</option> 216 <option value="1" selected="true">The first alignment passing E-value threshold are reported (very fast, best choice if only filtering is needed)</option>
174 <option value="other_value">A custom number of alignments are made and reported (speed decrease for high value)</option> 217 <option value="other_value">A custom number of alignments are made and reported (speed decrease for high value)</option>
175 </param> 218 </param>
219 <when value="0" />
220 <when value="1" />
176 <when value="other_value"> 221 <when value="other_value">
177 <param name="report_num_alignments_value" type="integer" min="0" max="100" value="1" label="Number of alignments to be made and reported" help=""/> 222 <param name="report_num_alignments_value" type="integer" min="0" max="100" value="1" label="Number of alignments to be made and reported" help=""/>
178 </when> 223 </when>
179 </conditional> 224 </conditional>
180 </when> 225 </when>
181 </conditional> 226 </conditional>
182 227
183 <param name="e_value" type="float" min="0" max="10" value="1" label="E-value threshold" help=""/> 228 <param name="e_value" type="float" min="0" max="10" value="1" label="E-value threshold" help="(-e)"/>
184 <param name="match" type="integer" min="0" max="10" value="2" label="SW score for a match" help=""/> 229 <param name="match" type="integer" min="0" max="10" value="2" label="SW score for a match" help="(--match)"/>
185 <param name="mismatch" type="integer" min="-10" max="0" value="-3" label="SW penalty for a mismatch" help=""/> 230 <param name="mismatch" type="integer" min="-10" max="0" value="-3" label="SW penalty for a mismatch" help="(--mismatch)"/>
186 <param name="gap_open" type="integer" min="0" max="10" value="5" label="SW penalty for introducing a gap" help=""/> 231 <param name="gap_open" type="integer" min="0" max="10" value="5" label="SW penalty for introducing a gap" help="(--gap_open)"/>
187 <param name="gap_ext" type="integer" min="0" max="10" value="2" label="SW penalty for extending a gap" help=""/> 232 <param name="gap_ext" type="integer" min="0" max="10" value="2" label="SW penalty for extending a gap" help="(--gap_ext)"/>
188 <param name="ambiguous_letter" type="integer" min="-10" max="0" value="-3" label="SW penalty for ambiguous letters (N's)" help=""/> 233 <param name="ambiguous_letter" type="integer" min="-10" max="0" value="-3" label="SW penalty for ambiguous letters (N's)" help="(-N)"/>
189
190 <param name="strand" type="select" display="radio" label="Search on" help="">
191 <option value="both" selected="true">Both strands</option>
192 <option value="forward" >Only forward strand</option>
193 <option value="reverse" >Only reverse-complementary strand</option>
194 </param>
195
196 <param name="seed_length" type="integer" min="0" max="100" value="18" label="Seed length for database indexing" help=""/>
197 <param name="max_pos" type="integer" min="0" max="100000" value="10000" label="Maximum number of positions to store for each k-mer for database indexing" help="With 0, all positions are stored"/>
198 </inputs> 234 </inputs>
199
200 <outputs> 235 <outputs>
201 <data format_source="input_sequence_file" name="aligned_sequence_file" 236 <data format_source="input_reads" name="output_fastx" from_work_dir="aligned.dat"
202 metadata="input_sequence_file" from_work_dir="aligned.dat" 237 label="Aligned reads on ${on_string} (${input_reads.datatype.file_ext})">
203 label="Aligned sequences on ${on_string} (SortMeRNA)"> 238 <filter>aligned_fastx['aligned_fastx_selector']</filter>
204 <filter>((fastx['fastx_test']))</filter> 239 </data>
205 </data> 240 <data format_source="input_reads" name="output_other" from_work_dir="other_file.dat"
206 241 label="Rejected reads on ${on_string} (${input_reads.datatype.file_ext})">
207 <data format_source="input_sequence_file" name="rejected_sequence_file" 242 <filter>aligned_fastx['aligned_fastx_selector'] and aligned_fastx['other']</filter>
208 metadata="input_sequence_file" from_work_dir="other_file.dat" 243 </data>
209 label="Rejected sequences on ${on_string} (SortMeRNA)"> 244 <data format="sam" name="output_sam" from_work_dir="aligned.sam"
210 <filter>((fastx['fastx_test'] and fastx['fastx_rejected']))</filter> 245 label="Alignments on ${on_string} (SAM)">
211 </data> 246 <filter>aligned_sam['aligned_sam_selector']</filter>
212 247 </data>
213 <data format="sam" name="sam_alignment_file" metadata="input_sequence_file" 248 <data format="tabular" name="output_blast" from_work_dir="aligned.blast"
214 from_work_dir="aligned.sam" 249 label="Alignments on ${on_string} (BLAST)">
215 label="SAM alignments on ${on_string} (SortMeRNA)"> 250 <filter>aligned_blast</filter>
216 <filter>((sam['sam_test']]))</filter>
217 </data>
218
219 <data format="tabular" name="blast_output_file"
220 metadata="input_sequence_file" from_work_dir="aligned.blast"
221 label="Blast alignments on ${on_string} (SortMeRNA)">
222 <filter>blast_format</filter>
223 <change_format> 251 <change_format>
224 <when input="blast_format" value="--blast 0" format="txt" /> 252 <when input="aligned_blast" value="--blast 0" format="txt" />
225 </change_format> 253 </change_format>
226 </data> 254 </data>
227 255 <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="aligned.log">
228 <data format="txt" name="output_log" metadata="input_sequence_file"
229 from_work_dir="aligned.log" label="Log on ${on_string} (SortMeRNA)">
230 <filter>log</filter> 256 <filter>log</filter>
231 </data> 257 </data>
232 </outputs> 258 </outputs>
233
234 <tests> 259 <tests>
235 <test> 260 <test>
236 <param name="input_sequence_file" value="sortmerna_input_sequences.fastq" ftype="fastq"/> 261 <param name="input_reads" value="read_small.fastq" />
262 <param name="sequencing_type_selector" value="not_paired" />
263 <param name="strand_search" value="" />
237 <param name="databases_selector" value="history" /> 264 <param name="databases_selector" value="history" />
238 <param name="databases_name" value="sortmerna_db.fasta" ftype="fasta"/> 265 <param name="database_name" value="ref_small.fasta" />
239 <param name="fastx_test" value="--fastx" /> 266 <param name="other" value="True" />
240 <param name='fastx_rejected' value="True"/> 267 <param name="log" value="" />
241 <param name="sam_test" value="" /> 268 <output name="output_fastx" file="sortmerna_wrapper_accept1.fastq" />
242 <param name="blast_format" value="--blast 3" /> 269 <output name="output_other" file="sortmerna_wrapper_other1.fastq" />
243 <param name='log' value="" /> 270 <output name="output_sam" file="sortmerna_wrapper_sam1.sam" lines_diff="2" />
244 <param name="report_type" value="best" /> 271 </test>
245 <param name="report_best_type" value="1" /> 272 <test>
246 <param name="report_best_min_lis" value="2" /> 273 <param name="input_reads" value="read_small.fasta" />
247 <param name="e_value" value="1" /> 274 <param name="sequencing_type_selector" value="not_paired" />
248 <param name="match" value="2" /> 275 <param name="strand_search" value="" />
249 <param name="mismatch" value="-3" /> 276 <param name="databases_selector" value="history" />
250 <param name="gap_open" value="5" /> 277 <param name="database_name" value="ref_small.fasta" />
251 <param name="gap_ext" value="2" /> 278 <param name="other" value="True" />
252 <param name="ambiguous_letter" value="-3" /> 279 <param name="log" value="" />
253 <param name="strand" value="both" /> 280 <output name="output_fastx" file="sortmerna_wrapper_accept2.fasta" />
254 <param name="seed_length" value="18" /> 281 <output name="output_other" file="sortmerna_wrapper_other2.fasta" />
255 <param name="max_pos" value="10000" /> 282 <output name="output_sam" file="sortmerna_wrapper_sam2.sam" lines_diff="2" />
256
257 <output name="aligned_sequence_file" file="sortmerna_aligned_sequences.fastq" ftype="fastq"/>
258 <output name="rejected_sequence_file" file="sortmerna_rejected_sequences.fastq" ftype="fastq"/>
259 <output name="blast_output_file" file="sortmerna_blast_output.tabular" ftype="tabular"/>
260 </test> 283 </test>
261 </tests> 284 </tests>
262 285 <help>
263 <help><![CDATA[ 286 <![CDATA[
264
265 **What it does** 287 **What it does**
266 288
267 SortMeRNA is a tool for RNA filtering based on local sequence alignment against 289 SortMeRNA_ is a software designed to rapidly filter ribosomal RNA fragments
268 rRNA database. For more information, check the `user manual <http://bioinfo.lifl.fr/RNA/sortmerna/code/SortMeRNA-user-manual-v1.7.pdf>`_. 290 from metatransriptomic data produced by next-generation sequencers.
269 291 It is capable of handling large RNA databases and sorting out all fragments
270 ----- 292 matching to the database with high accuracy and specificity.
293
294 .. _SortMeRNA: http://bioinfo.lifl.fr/RNA/sortmerna/
295
271 296
272 **Input** 297 **Input**
273 298
274 The input is a sequence file in fasta or fastq and databases to search against. 299 The input is one file of reads in FASTA or FASTQ format and any number of rRNA databases to search against.
275 These databases have to be indexed before the sequence alignment. 300 If the user has two foward-reverse paired-sequencing reads files, they may use
276 301 the script "merge_paired_reads.sh" to interleave the reads into one file, preserving their order.
277 SortMeRNA is distributed with 8 rRNA databases constructed from SILVA SSU,LSU 302
278 (version 111) and the RFAM 5/5.8S (version 11.0) databases: 303 If the sequencing type for the reads is paired-ended, the user has two options under
279 304 "Sequencing type" to filter the reads and preserve their order in the file.
280 - SILVA 16S bacteria 305 For a further example of each option, please refer to Section 4.2.3 in the `SortMeRNA User Manual`_.
281 - SILVA 16S archaea 306
282 - SILVA 18S eukarya 307 .. _sortmerna user manual: http://bioinfo.lifl.fr/RNA/sortmerna/code/SortMeRNA-user-manual-v1.7.pdf
283 - SILVA 23S bacteria 308
284 - SILVA 23s archaea 309
285 - SILVA 28S eukarya 310 **Output**
286 - Rfam 5S archaea/bacteria 311
287 - Rfam 5.8S eukarya 312 The output will follow the same format (FASTA or FASTQ) as the reads. Optionally, a statistic file for the rRNA content of reads, as well as rRNA subunit distribution can be generated.
288 313
289 These databases are available as public ribosomal databases. But local databases 314
290 can also be used. 315 **rRNA databases**
291 316
292 ----- 317 SortMeRNA is distributed with 8 representative rRNA databases, which were
293 318 all constructed from the SILVA SSU,LSU (version 111) and the RFAM 5/5.8S
294 **Parameters** 319 (version 11.0) databases using the tool UCLUST.
295 320
296 The database index can be modulated by: 321 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
297 322 | Representative database | id % | average id% | # seq (clustered) | Origin | # seq (original) |
298 - Seed length 323 +==========================+======+=============+===================+========================+===================+
299 - Maximum number of positions to store for each k-mer for database indexing 324 | SILVA 16S bacteria | 85 | 91.6 | 8174 | SILVA SSU Ref NR v.111 | 244077 |
300 325 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
301 For RNA sorting, the parameters are: 326 | SILVA 16S archaea | 95 | 96.7 | 3845 | SILVA SSU Ref NR v.111 | 10919 |
302 327 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
303 - Test to output files in fasta or fastq, in sam and/or in blast format 328 | SILVA 18S eukarya | 95 | 96.7 | 4512 | SILVA SSU Ref NR v.111 | 31862 |
304 - Test for conservation of rejected sequences 329 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
305 - Choice in blast format 330 | SILVA 23S bacteria | 98 | 99.4 | 3055 | SILVA LSU Ref v.111 | 19580 |
306 - Test to add SQ tags in sam file 331 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
307 - Filtering and read mapping parameters 332 | SILVA 23s archaea | 98 | 99.5 | 164 | SILVA LSU Ref v.111 | 405 |
308 - Test for conservation of best alignment or first alignment 333 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
309 - Number of searched, conserved alignments 334 | SILVA 28S eukarya | 98 | 99.1 | 4578 | SILVA LSU Ref v.111 | 9321 |
310 - E-value threshold 335 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
311 - SW score for a match, for a mismatch, for introducing a gap, for extending a gap, for ambigous letters 336 | Rfam 5S archaea/bacteria | 98 | 99.2 | 59513 | RFAM | 116760 |
312 - Strand to search 337 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
313 338 | Rfam 5.8S eukarya | 98 | 98.9 | 13034 | RFAM | 225185 |
314 ----- 339 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
315 340
316 **Outputs** 341 id %: members of the cluster must have identity at least 'id %' identity with the representative sequence
317 342
318 Given the choosen parameters, several outputs are possible 343 average id %: average identity of a cluster member to the representative sequence
319 344
320 - Sequence file in fasta or fastq with aligned sequences (or conserved) 345 The user may also choose to use their own rRNA databases.
321 - Sequence file in fasta or fastq with rejected sequences 346
322 - File with sam alignments 347 .. class:: warningmark
323 - File with blast outputs 348
324 349 Note that your personal databases are indexed each time. The public ribosomal
325 350 databases are indexed when added, but they can be re-indexed with non-default indexing
326 ]]></help> 351 parameters. The indexing may take some time depending on the size of the given database.
327 352
353 ]]>
354 </help>
355
328 <citations> 356 <citations>
329 <citation type="doi">10.1093/bioinformatics/bts611</citation> 357 <citation type="doi">10.1093/bioinformatics/bts611</citation>
358 <citation type="doi">10.1093/nar/gks1219</citation>
359 <citation type="doi">10.1093/nar/gks1005</citation>
360 <citation type="doi">10.1093/bioinformatics/btq461</citation>
361 <citation type="doi">10.1038/nbt.2198</citation>
330 </citations> 362 </citations>
331 </tool> 363 </tool>