comparison sortmerna.xml @ 3:42509ccf8f59 draft

Uploaded
author iuc
date Tue, 04 Aug 2015 15:14:58 -0400
parents
children e6727cef3083
comparison
equal deleted inserted replaced
2:6f23678fc6e9 3:42509ccf8f59
1 <tool id="bg_sortmerna" name="Filter with SortMeRNA" version="2.0.0">
2 <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description>
3 <requirements>
4 <requirement type='package' version="2.0">sortmerna</requirement>
5 </requirements>
6 <stdio>
7 <regex match="This program builds a Burst trie on an input rRNA database"
8 source="both"
9 level="fatal"
10 description="Buildtrie program failed to execute." />
11 <regex match="The database name"
12 source="both"
13 level="fatal"
14 description="The database ${databases} has not been preprocessed using buildtrie before using SortMeRNA." />
15 </stdio>
16 <version_command>
17 <![CDATA[
18 sortmerna --version 2>&1|grep 'SortMeRNA version'
19 ]]>
20 </version_command>
21 <command>
22 <![CDATA[
23 #set $ref = ''
24 #set $sep=''
25 #if str( $databases_type.databases_selector ) == 'history':
26 #for $db in $databases_type.database_name
27 #set $ref += $sep + str($db) + ',' + $os.path.splitext($os.path.basename(str($db)))[0]
28 #set $sep = ':'
29 #end for
30 indexdb_rna --ref $ref
31 #else:
32 ## databases path is not directly accessible, must match by hand with LOC file contents
33 #set $data_table = dict([(_[0], _[2]) for _ in $databases_type.input_databases.input.options.tool_data_table.data])
34 #for $db in $databases_type.input_databases.value
35 #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0]
36 #set $sep = ':'
37 #end for
38 #end if
39 &&
40 sortmerna --ref $ref --reads $input_reads --aligned aligned
41 #if str( $sequencing_type.sequencing_type_selector ) == 'paired'
42 $sequencing_type.paired_type
43 #end if
44 $strand_search
45 $aligned_fastx.aligned_fastx_selector
46 #if $aligned_fastx.aligned_fastx_selector == '--fastx'
47 #if $aligned_fastx.other
48 --other other_file
49 #end if
50 #end if
51 $aligned_sam.aligned_sam_selector
52 #if $aligned_sam.aligned_sam_selector == '--sam'
53 $aligned_sam.sq
54 #end if
55 $aligned_blast
56 $log
57 -a \${GALAXY_SLOTS:-1}
58 ]]>
59 </command>
60 <inputs>
61 <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences" help="In FASTA or FASTQ format (--reads)"/>
62 <conditional name="sequencing_type">
63 <param name="sequencing_type_selector" type="select" label="Sequencing type">
64 <option value="not_paired">Reads are not paired</option>
65 <option value="paired">Reads are paired</option>
66 </param>
67 <when value="paired">
68 <param name="paired_type" type="select" display="radio" label="If one of the paired-end reads aligns and the other one does not">
69 <option value="">leave the reads split between aligned and rejected files</option>
70 <option value="--paired-in">output both reads to aligned file (--paired-in)</option>
71 <option value="--paired-out">output both reads to rejected file (--paired-out)</option>
72 </param>
73 </when>
74 </conditional>
75
76 <param name="strand_search" type="select" label="Which strands to search" display="radio">
77 <option value="">Search both strands</option>
78 <option value="-F">Search only the forward strand (-F)</option>
79 <option value="-R">Search only the reverse-complementary strand (-R)</option>
80 </param>
81
82 <conditional name="databases_type">
83 <param name="databases_selector" type="select" label="Databases to query"
84 help="Public rRNA databases provided with SortMeRNA have been indexed.
85 On the contrary, personal databases must be indexed each time SortMeRNA is launched.
86 Please be patient, this may take some time depending on the size of the given database.">
87 <option value="cached" selected="true">Public ribosomal databases</option>
88 <option value="history">Databases from your history</option>
89 </param>
90 <when value="cached">
91 <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true">
92 <options from_data_table="rRNA_databases" />
93 <validator type="no_options" message="Select at least one database"/>
94 </param>
95 </when>
96 <when value="history">
97 <param name="database_name" type="data" format="fasta" multiple="true" label="rRNA databases"
98 help="Your databases will be indexed first, which may take up to several minutes."/>
99 </when>
100 </conditional>
101
102 <!-- Outputs -->
103 <conditional name="aligned_fastx">
104 <param name="aligned_fastx_selector" type="select" label="Include aligned reads in FASTA/FASTQ format">
105 <option value="--fastx">Yes (--fastx)</option>
106 <option value="">No</option>
107 </param>
108 <when value="--fastx">
109 <param name="other" type="boolean" label="Include rejected reads file" help="(--other)" />
110 </when>
111 <when value="" />
112 </conditional>
113 <conditional name="aligned_sam">
114 <param name="aligned_sam_selector" type="select" label="Include alignments in SAM format">
115 <option value="--sam">Yes (--sam)</option>
116 <option value="">No</option>
117 </param>
118 <when value="--sam">
119 <param name="sq" type="boolean" truevalue="--SQ" falsevalue="" label="Add SQ tags to the SAM file" help="(--SQ)" />
120 </when>
121 <when value="" />
122 </conditional>
123 <param name="aligned_blast" type="select" label="Include alignments in BLAST-like format">
124 <option value="--blast 0">pairwise (--blast 0)</option>
125 <option value="--blast 1">tabular BLAST -m 8 format (--blast 1)</option>
126 <option value="--blast 2">tabular + column for CIGAR (--blast 2)</option>
127 <option value="--blast 3">tabular + columns for CIGAR and query coverage (--blast 3)</option>
128 <option value="" selected="true">No</option>
129 </param>
130 <param name="log" type="boolean" checked="False" truevalue="--log" falsevalue="" label="Generate statistics file"
131 help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)">
132 </param>
133 </inputs>
134 <outputs>
135 <data format_source="input_reads" name="output_fastx" from_work_dir="aligned.dat"
136 label="Aligned reads on ${on_string} (${input_reads.datatype.file_ext})">
137 <filter>aligned_fastx.aligned_fastx_selector</filter>
138 </data>
139 <data format_source="input_reads" name="output_other" from_work_dir="other_file.dat"
140 label="Rejected reads on ${on_string} (${input_reads.datatype.file_ext})">
141 <filter>aligned_fastx.aligned_fastx_selector and aligned_fastx.other</filter>
142 </data>
143 <data format="sam" name="output_sam" from_work_dir="aligned.sam"
144 label="Alignments on ${on_string} (SAM)">
145 <filter>aligned_sam.aligned_sam_selector</filter>
146 </data>
147 <data format="tabular" name="output_blast" from_work_dir="aligned.blast"
148 label="Alignments on ${on_string} (SAM)">
149 <filter>aligned_blast</filter>
150 <change_format>
151 <when input="aligned_blast" value="--blast 0" format="txt" />
152 </change_format>
153 </data>
154 <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="aligned.log">
155 <filter>log</filter>
156 </data>
157 </outputs>
158 <tests>
159 <test>
160 <param name="input_reads" value="read_small.fastq" />
161 <param name="sequencing_type_selector" value="not_paired" />
162 <param name="strand_search" value="" />
163 <param name="databases_selector" value="history" />
164 <param name="database_name" value="ref_small.fasta" />
165 <param name="other" value="True" />
166 <param name="log" value="" />
167 <output name="output_fastx" file="sortmerna_wrapper_accept1.fastq" />
168 <output name="output_other" file="sortmerna_wrapper_other1.fastq" />
169 <output name="output_sam" file="sortmerna_wrapper_sam1.sam" lines_diff="2" />
170 </test>
171 <test>
172 <param name="input_reads" value="read_small.fasta" />
173 <param name="sequencing_type_selector" value="not_paired" />
174 <param name="strand_search" value="" />
175 <param name="databases_selector" value="history" />
176 <param name="database_name" value="ref_small.fasta" />
177 <param name="other" value="True" />
178 <param name="log" value="" />
179 <output name="output_fastx" file="sortmerna_wrapper_accept2.fasta" />
180 <output name="output_other" file="sortmerna_wrapper_other2.fasta" />
181 <output name="output_sam" file="sortmerna_wrapper_sam2.sam" lines_diff="2" />
182 </test>
183 </tests>
184 <help>
185 <![CDATA[
186 **What it does**
187
188 SortMeRNA_ is a software designed to rapidly filter ribosomal RNA fragments
189 from metatransriptomic data produced by next-generation sequencers.
190 It is capable of handling large RNA databases and sorting out all fragments
191 matching to the database with high accuracy and specificity.
192
193 .. _SortMeRNA: http://bioinfo.lifl.fr/RNA/sortmerna/
194
195
196 **Input**
197
198 The input is one file of reads in FASTA or FASTQ format and any number of rRNA databases to search against.
199 If the user has two foward-reverse paired-sequencing reads files, they may use
200 the script "merge_paired_reads.sh" to interleave the reads into one file, preserving their order.
201
202 If the sequencing type for the reads is paired-ended, the user has two options under
203 "Sequencing type" to filter the reads and preserve their order in the file.
204 For a further example of each option, please refer to Section 4.2.3 in the `SortMeRNA User Manual`_.
205
206 .. _sortmerna user manual: http://bioinfo.lifl.fr/RNA/sortmerna/code/SortMeRNA-user-manual-v1.7.pdf
207
208
209 **Output**
210
211 The output will follow the same format (FASTA or FASTQ) as the reads. Optionally, a statistic file for the rRNA content of reads, as well as rRNA subunit distribution can be generated.
212
213
214 **rRNA databases**
215
216 SortMeRNA is distributed with 8 representative rRNA databases, which were
217 all constructed from the SILVA SSU,LSU (version 111) and the RFAM 5/5.8S
218 (version 11.0) databases using the tool UCLUST.
219
220 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
221 | Representative database | id % | average id% | # seq (clustered) | Origin | # seq (original) |
222 +==========================+======+=============+===================+========================+===================+
223 | SILVA 16S bacteria | 85 | 91.6 | 8174 | SILVA SSU Ref NR v.111 | 244077 |
224 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
225 | SILVA 16S archaea | 95 | 96.7 | 3845 | SILVA SSU Ref NR v.111 | 10919 |
226 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
227 | SILVA 18S eukarya | 95 | 96.7 | 4512 | SILVA SSU Ref NR v.111 | 31862 |
228 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
229 | SILVA 23S bacteria | 98 | 99.4 | 3055 | SILVA LSU Ref v.111 | 19580 |
230 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
231 | SILVA 23s archaea | 98 | 99.5 | 164 | SILVA LSU Ref v.111 | 405 |
232 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
233 | SILVA 28S eukarya | 98 | 99.1 | 4578 | SILVA LSU Ref v.111 | 9321 |
234 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
235 | Rfam 5S archaea/bacteria | 98 | 99.2 | 59513 | RFAM | 116760 |
236 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
237 | Rfam 5.8S eukarya | 98 | 98.9 | 13034 | RFAM | 225185 |
238 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
239
240 id %: members of the cluster must have identity at least 'id %' identity with the representative sequence
241
242 average id %: average identity of a cluster member to the representative sequence
243
244 The user may also choose to use their own rRNA databases.
245
246 .. class:: warningmark
247
248 Note that your personal databases are indexed each time, and that
249 this may take some time depending on the size of the given database.
250 ]]>
251 </help>
252
253 <citations>
254 <citation type="doi">10.1093/bioinformatics/bts611</citation>
255 <citation type="doi">10.1093/nar/gks1219</citation>
256 <citation type="doi">10.1093/nar/gks1005</citation>
257 <citation type="doi">10.1093/bioinformatics/btq461</citation>
258 <citation type="doi">10.1038/nbt.2198</citation>
259 </citations>
260 </tool>