Mercurial > repos > bebatut > sortmerna
comparison sortmerna.xml @ 15:baab049d3aff draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sortmerna commit e20e66ff81239452b3d75dec16e9e0cc8eb46266-dirty
author | bebatut |
---|---|
date | Wed, 10 Feb 2016 04:02:18 -0500 |
parents | 4016c1db6886 |
children | 1a4662c2d6db |
comparison
equal
deleted
inserted
replaced
14:011ec5258e71 | 15:baab049d3aff |
---|---|
1 <tool id="sortmerna" name="SortMeRNA" version="0.1.0"> | 1 <tool id="bg_sortmerna" name="Filter with SortMeRNA" version="2.1.0"> |
2 <description>to filter ribosomal RNAs in metatranscriptomic data</description> | 2 <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description> |
3 | |
4 <requirements> | 3 <requirements> |
5 <requirement type="package" version="2.0">sortmerna</requirement> | 4 <requirement type="package" version="2.0">sortmerna</requirement> |
6 </requirements> | 5 </requirements> |
7 | |
8 <stdio> | 6 <stdio> |
9 <exit_code range="1:" /> | 7 <regex match="This program builds a Burst trie on an input rRNA database" |
8 source="both" | |
9 level="fatal" | |
10 description="Buildtrie program failed to execute." /> | |
11 <regex match="The database name" | |
12 source="both" | |
13 level="fatal" | |
14 description="The database ${databases} has not been preprocessed using buildtrie before using SortMeRNA." /> | |
15 <regex match="ERROR" | |
16 source="both" | |
17 level="fatal" | |
18 description="ERROR" /> | |
10 </stdio> | 19 </stdio> |
11 | |
12 <version_command> | 20 <version_command> |
13 <![CDATA[ | 21 <![CDATA[ |
14 \${SORTMERNADIR}/sortmerna --version 2>&1|grep 'SortMeRNA version' | 22 sortmerna --version 2>&1|grep 'SortMeRNA version' |
15 ]]> | 23 ]]> |
16 </version_command> | 24 </version_command> |
17 | |
18 <command> | 25 <command> |
19 <![CDATA[ | 26 <![CDATA[ |
20 #set $ref = '' | 27 #set $ref = '' |
21 #set $sep='' | 28 #set $sep='' |
22 | 29 #if str( $databases_type.databases_selector ) == 'history' |
23 #if str( $databases.databases_selector ) == 'history' | 30 #for $db in $databases_type.database_name |
24 #for $db in $databases.databases_name | |
25 #set $ref += $sep + str($db) + ',' + $os.path.splitext($os.path.basename(str($db)))[0] | 31 #set $ref += $sep + str($db) + ',' + $os.path.splitext($os.path.basename(str($db)))[0] |
26 #set $sep = ':' | 32 #set $sep = ':' |
27 #end for | 33 #end for |
28 #else | 34 #else if str( $databases_type.databases_selector ) == 'cached_to_index' |
29 ## databases path is not directly accessible, must match by hand with LOC file contents | 35 ## databases path is not directly accessible, must match by hand with LOC file contents |
30 #set $data_table = dict([(_[0], _[2]) for _ in $databases.databases_input.input.options.tool_data_table.data]) | 36 #set $data_table = dict([(_[0], _[2]) for _ in $databases_type.input_databases.input.options.tool_data_table.data]) |
31 #for $db in $databases.databases_input.value | 37 #for $db in $databases_type.input_databases.value |
38 #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0] + '-reindexed' | |
39 #set $sep = ':' | |
40 #end for | |
41 #else: | |
42 ## databases path is not directly accessible, must match by hand with LOC file contents | |
43 #set $data_table = dict([(_[0], _[2]) for _ in $databases_type.input_databases.input.options.tool_data_table.data]) | |
44 #for $db in $databases_type.input_databases.value | |
32 #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0] | 45 #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0] |
33 #set $sep = ':' | 46 #set $sep = ':' |
34 #end for | 47 #end for |
35 #end if | 48 #end if |
36 | 49 |
37 \${SORTMERNADIR}/indexdb_rna --ref $ref -L $seed_length --max_pos $max_pos | 50 #if str( $databases_type.databases_selector ) != 'cached': |
38 | 51 indexdb_rna |
39 && | 52 --ref $ref |
40 | 53 -L $databases_type.seed_length |
41 \${SORTMERNADIR}/sortmerna | 54 --max_pos $databases_type.max_pos |
42 --ref $ref | 55 && |
43 --reads $input_sequence_file | 56 #end if |
44 --aligned aligned | 57 |
45 | 58 sortmerna |
46 $fastx.fastx_test | 59 --ref $ref |
47 #if $fastx.fastx_test == '--fastx' | 60 --reads $input_reads |
48 #if $fastx.fastx_rejected | 61 --aligned aligned |
49 --other other_file | 62 |
50 #end if | 63 #if str( $sequencing_type.sequencing_type_selector ) == 'paired' |
51 #end if | 64 $sequencing_type.paired_type |
52 | 65 #end if |
53 $sam.sam_test | 66 |
54 #if $sam.sam_test == '--sam' | 67 $strand_search |
55 $sam.sam_sq_tag | 68 $aligned_fastx.aligned_fastx_selector |
56 #end if | 69 #if $aligned_fastx.aligned_fastx_selector == '--fastx' |
57 | 70 #if $aligned_fastx.other |
58 $blast_format | 71 --other other_file |
59 | 72 #end if |
60 $log | 73 #end if |
74 $aligned_sam.aligned_sam_selector | |
75 #if $aligned_sam.aligned_sam_selector == '--sam' | |
76 $aligned_sam.sq | |
77 #end if | |
78 $aligned_blast | |
79 | |
80 $log | |
61 | 81 |
62 #if $report.report_type == 'best' | 82 #if $report.report_type == 'best' |
63 #if $report.report_best.report_best_type == '0' | 83 #if $report.report_best.report_best_type == '1' |
64 --best 0 | |
65 #else if $report.report_best.report_best_type == '1' | |
66 --best 1 | 84 --best 1 |
67 --min_lis $report.report_best.report_best_min_lis | 85 --min_lis $report.report_best.report_best_min_lis |
68 #else | 86 #else |
69 --best $report.report_best.report_best_value | 87 --best $report.report_best.report_best_value |
70 --min_list $report.report_best.report_best_min_lis | 88 --min_lis $report.report_best.report_best_min_lis |
71 #end if | 89 #end if |
72 #else | 90 #else |
73 #if $report.report_num_alignments.report_num_alignments_type == '0' | 91 #if $report.report_num_alignments.report_num_alignments_type == 'other_value' |
74 --num_alignments 0 | 92 --num_alignments $report.report_num_alignments.report_num_alignments_value |
75 #else if $report.report_num_alignments.report_num_alignments_type == '1' | |
76 --num_alignments 1 | |
77 #else | 93 #else |
78 --num_alignments $report.report_num_alignments.report_num_alignments_value | 94 --num_alignments $report.report_num_alignments.report_num_alignments_type |
79 #end if | 95 #end if |
80 #end if | 96 #end if |
81 | 97 |
82 -e $e_value | 98 -e $e_value |
83 --match $match | 99 --match $match |
84 --mismatch $mismatch | 100 --mismatch $mismatch |
85 --gap_open $gap_open | 101 --gap_open $gap_open |
86 --gap_ext $gap_ext | 102 --gap_ext $gap_ext |
87 -N $ambiguous_letter | 103 -N $ambiguous_letter |
88 | 104 -a \${GALAXY_SLOTS:-1} |
89 #if $strand == 'forward' | |
90 -F | |
91 #end if | |
92 #if $strand == 'reverse' | |
93 -R | |
94 #end if | |
95 ]]> | 105 ]]> |
96 </command> | 106 </command> |
97 | |
98 <inputs> | 107 <inputs> |
99 <param name="input_sequence_file" type="data" format="fastq,fasta" label="Input sequence file" help=""/> | 108 <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences" help="In FASTA or FASTQ format (--reads)"/> |
100 | 109 <conditional name="sequencing_type"> |
101 <conditional name="databases"> | 110 <param name="sequencing_type_selector" type="select" label="Sequencing type"> |
102 <param name="databases_selector" type="select" label="Databases to query" help=""> | 111 <option value="not_paired">Reads are not paired</option> |
103 <option value="cached" selected="true">Public ribosomal databases</option> | 112 <option value="paired">Reads are paired</option> |
113 </param> | |
114 <when value="not_paired" /> | |
115 <when value="paired"> | |
116 <param name="paired_type" type="select" display="radio" label="If one of the paired-end reads aligns and the other one does not"> | |
117 <option value="">leave the reads split between aligned and rejected files</option> | |
118 <option value="--paired-in">output both reads to aligned file (--paired-in)</option> | |
119 <option value="--paired-out">output both reads to rejected file (--paired-out)</option> | |
120 </param> | |
121 </when> | |
122 </conditional> | |
123 | |
124 <param name="strand_search" type="select" label="Which strands to search"> | |
125 <option value="">Search both strands</option> | |
126 <option value="-F">Search only the forward strand (-F)</option> | |
127 <option value="-R">Search only the reverse-complementary strand (-R)</option> | |
128 </param> | |
129 | |
130 <conditional name="databases_type"> | |
131 <param name="databases_selector" type="select" label="Databases to query" | |
132 help="Public rRNA databases provided with SortMeRNA have been indexed. | |
133 On the contrary, personal databases must be indexed each time SortMeRNA is launched. | |
134 Please be patient, this may take some time depending on the size of the given database."> | |
135 <option value="cached" selected="true">Public pre-indexed ribosomal databases</option> | |
136 <option value="cached_to_index">Public ribosomal databases to index with non default parameters</option> | |
104 <option value="history">Databases from your history</option> | 137 <option value="history">Databases from your history</option> |
105 </param> | 138 </param> |
106 <when value="cached"> | 139 <when value="cached"> |
107 <param name="databases_input" label="rRNA databases" type="select" display="checkboxes" multiple="true"> | 140 <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true"> |
108 <options from_data_table="sortmerna_rRNA_databases" /> | 141 <options from_data_table="rRNA_databases" /> |
109 <validator type="no_options" message="Select at least one database"/> | 142 <validator type="no_options" message="Select at least one database"/> |
110 </param> | 143 </param> |
111 </when> | 144 </when> |
145 <when value="cached_to_index"> | |
146 <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true"> | |
147 <options from_data_table="rRNA_databases" /> | |
148 <validator type="no_options" message="Select at least one database"/> | |
149 </param> | |
150 <param name="seed_length" type="integer" min="0" max="100" value="18" label="Seed length for database indexing" help="(-L)"/> | |
151 <param name="max_pos" type="integer" min="0" max="100000" value="10000" label="Maximum number of positions to store for each k-mer for database indexing" help="With 0, all positions are stored (--max_pos)"/> | |
152 </when> | |
112 <when value="history"> | 153 <when value="history"> |
113 <param name="databases_name" type="data" format="fasta" multiple="true" label="rRNA databases" | 154 <param name="database_name" type="data" format="fasta" multiple="true" label="rRNA databases" |
114 help=""/> | 155 help="Your databases will be indexed first, which may take up to several minutes."/> |
115 </when> | 156 <param name="seed_length" type="integer" min="0" max="100" value="18" label="Seed length for database indexing" help="(-L)"/> |
116 </conditional> | 157 <param name="max_pos" type="integer" min="0" max="100000" value="10000" label="Maximum number of positions to store for each k-mer for database indexing" help="With 0, all positions are stored (--max_pos)"/> |
117 | 158 </when> |
118 <conditional name="fastx"> | 159 </conditional> |
119 <param name="fastx_test" type='select' label="Output into Fasta/FastQ file?" help=""> | 160 |
120 <option value="--fastx">Yes</option> | 161 <!-- Outputs --> |
162 <conditional name="aligned_fastx"> | |
163 <param name="aligned_fastx_selector" type="select" label="Include aligned reads in FASTA/FASTQ format?"> | |
164 <option value="--fastx">Yes (--fastx)</option> | |
121 <option value="">No</option> | 165 <option value="">No</option> |
122 </param> | 166 </param> |
123 <when value="--fastx"> | 167 <when value="--fastx"> |
124 <param name='fastx_rejected' type='boolean' checked="true" label="Conserve rejected reads?" help=""/> | 168 <param name="other" type="boolean" label="Include rejected reads file?" help="(--other)" /> |
125 </when> | 169 </when> |
126 </conditional> | 170 <when value="" /> |
127 | 171 </conditional> |
128 <conditional name="sam"> | 172 <conditional name="aligned_sam"> |
129 <param name="sam_test" type='select' label="Output SAM alignments?" help=""> | 173 <param name="aligned_sam_selector" type="select" label="Include alignments in SAM format?"> |
130 <option value="--sam">Yes</option> | 174 <option value="--sam">Yes (--sam)</option> |
131 <option value="">No</option> | 175 <option value="">No</option> |
132 </param> | 176 </param> |
133 <when value="--sam"> | 177 <when value="--sam"> |
134 <param name='sam_sq_tag' type='boolean' checked="true" truevalue="--SQ" falsevalue="" label="Add SQ tags to SAM file?" help=""/> | 178 <param name="sq" type="boolean" truevalue="--SQ" falsevalue="" label="Add SQ tags to the SAM file" help="(--SQ)" /> |
135 </when> | 179 </when> |
136 </conditional> | 180 <when value="" /> |
137 | 181 </conditional> |
138 <param name="blast_format" type="select" display="radio" label="Format for BLAST output" help=""> | 182 <param name="aligned_blast" type="select" label="Include alignments in BLAST-like format"> |
139 <option value="--blast 0">Pairwise</option> | 183 <option value="--blast 0">pairwise (--blast 0)</option> |
140 <option value="--blast 1">Tabular (Blast -m 8 format)</option> | 184 <option value="--blast 1">tabular BLAST -m 8 format (--blast 1)</option> |
141 <option value="--blast 2'">Tabular + column for CIGAR</option> | 185 <option value="--blast 2">tabular + column for CIGAR (--blast 2)</option> |
142 <option value="--blast 3" selected="true">Tabular + columns for CIGAR and query coverage</option> | 186 <option value="--blast 3">tabular + columns for CIGAR and query coverage (--blast 3)</option> |
143 <option value="">No Blast output</option> | 187 <option value="" selected="true">No</option> |
144 </param> | 188 </param> |
145 | 189 <param name="log" type="boolean" checked="False" truevalue="--log" falsevalue="" label="Generate statistics file" |
146 <param name='log' type='boolean' checked="true" truevalue="--log" falsevalue="" label="Conserve overall statistic output into a log file?" help=""/> | 190 help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)"> |
147 | 191 </param> |
148 <conditional name="report"> | 192 <conditional name="report"> |
149 <param name="report_type" type="select" display="radio" label="Parameters for filtering and read mapping" help=""> | 193 <param name="report_type" type="select" label="Parameters for filtering and read mapping" help=""> |
150 <option value="best" selected="true">Report best alignments per read reaching E-value</option> | 194 <option value="best" selected="true">Report best alignments per read reaching E-value</option> |
151 <option value="num_alignments">Report first alignements per read reaching E-value</option> | 195 <option value="num_alignments">Report first alignements per read reaching E-value</option> |
152 </param> | 196 </param> |
153 <when value="best"> | 197 <when value="best"> |
154 <conditional name="report_best"> | 198 <conditional name="report_best"> |
155 <param name="report_best_type" type="select" display="radio" label="Number of searched alignments" help="Only the best alignment is reported"> | 199 <param name="report_best_type" type="select" label="Number of searched alignments" help="Only the best alignment is reported (--best)"> |
156 <option value="0">All high-candidate reference sequences are searched for alignments (very slow)</option> | |
157 <option value="1" selected="true">Only one high-candidate reference sequence is searched for alignments (fast). The high-candidate sequences are determined heuristically using a LIS of seed matches)</option> | 200 <option value="1" selected="true">Only one high-candidate reference sequence is searched for alignments (fast). The high-candidate sequences are determined heuristically using a LIS of seed matches)</option> |
158 <option value="other_value">A custom number of reference sequences are searched for alignments (speed decrease for high value)</option> | 201 <option value="other_value">A custom number of reference sequences are searched for alignments (speed decrease for high value)</option> |
159 </param> | 202 </param> |
203 <when value="1"> | |
204 <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment. (--min_lis)"/> | |
205 </when> | |
160 <when value="other_value"> | 206 <when value="other_value"> |
161 <param name="report_best_value" type="integer" min="0" max="100" value="1" label="Number of alignments to be made" help="Only the best one is reported. The computation speed decrease with high value"/> | 207 <param name="report_best_value" type="integer" min="2" max="100" value="2" label="Number of alignments to be made" help="Only the best one is reported. The computation speed decrease with high value"/> |
162 <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment."/> | 208 <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment. (--min_lis)"/> |
163 </when> | |
164 <when value="1"> | |
165 <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment."/> | |
166 </when> | 209 </when> |
167 </conditional> | 210 </conditional> |
168 </when> | 211 </when> |
169 <when value="num_alignments"> | 212 <when value="num_alignments"> |
170 <conditional name="report_num_alignments"> | 213 <conditional name="report_num_alignments"> |
171 <param name="report_num_alignments_type" type="select" display="radio" label="Number of output alignments" help=""> | 214 <param name="report_num_alignments_type" type="select" label="Number of output alignments" help="(--num_alignments)"> |
172 <option value="0">All alignments reaching the E-value threshold are reported (very slow, this option is not suggested for high similarity rRNA databases)</option> | 215 <option value="0">All alignments reaching the E-value threshold are reported (very slow, this option is not suggested for high similarity rRNA databases)</option> |
173 <option value="1" selected="true">The first alignment passing E-value threshold are reported (very fast, best choice if only filtering is needed)</option> | 216 <option value="1" selected="true">The first alignment passing E-value threshold are reported (very fast, best choice if only filtering is needed)</option> |
174 <option value="other_value">A custom number of alignments are made and reported (speed decrease for high value)</option> | 217 <option value="other_value">A custom number of alignments are made and reported (speed decrease for high value)</option> |
175 </param> | 218 </param> |
219 <when value="0" /> | |
220 <when value="1" /> | |
176 <when value="other_value"> | 221 <when value="other_value"> |
177 <param name="report_num_alignments_value" type="integer" min="0" max="100" value="1" label="Number of alignments to be made and reported" help=""/> | 222 <param name="report_num_alignments_value" type="integer" min="0" max="100" value="1" label="Number of alignments to be made and reported" help=""/> |
178 </when> | 223 </when> |
179 </conditional> | 224 </conditional> |
180 </when> | 225 </when> |
181 </conditional> | 226 </conditional> |
182 | 227 |
183 <param name="e_value" type="float" min="0" max="10" value="1" label="E-value threshold" help=""/> | 228 <param name="e_value" type="float" min="0" max="10" value="1" label="E-value threshold" help="(-e)"/> |
184 <param name="match" type="integer" min="0" max="10" value="2" label="SW score for a match" help=""/> | 229 <param name="match" type="integer" min="0" max="10" value="2" label="SW score for a match" help="(--match)"/> |
185 <param name="mismatch" type="integer" min="-10" max="0" value="-3" label="SW penalty for a mismatch" help=""/> | 230 <param name="mismatch" type="integer" min="-10" max="0" value="-3" label="SW penalty for a mismatch" help="(--mismatch)"/> |
186 <param name="gap_open" type="integer" min="0" max="10" value="5" label="SW penalty for introducing a gap" help=""/> | 231 <param name="gap_open" type="integer" min="0" max="10" value="5" label="SW penalty for introducing a gap" help="(--gap_open)"/> |
187 <param name="gap_ext" type="integer" min="0" max="10" value="2" label="SW penalty for extending a gap" help=""/> | 232 <param name="gap_ext" type="integer" min="0" max="10" value="2" label="SW penalty for extending a gap" help="(--gap_ext)"/> |
188 <param name="ambiguous_letter" type="integer" min="-10" max="0" value="-3" label="SW penalty for ambiguous letters (N's)" help=""/> | 233 <param name="ambiguous_letter" type="integer" min="-10" max="0" value="-3" label="SW penalty for ambiguous letters (N's)" help="(-N)"/> |
189 | |
190 <param name="strand" type="select" display="radio" label="Search on" help=""> | |
191 <option value="both" selected="true">Both strands</option> | |
192 <option value="forward" >Only forward strand</option> | |
193 <option value="reverse" >Only reverse-complementary strand</option> | |
194 </param> | |
195 | |
196 <param name="seed_length" type="integer" min="0" max="100" value="18" label="Seed length for database indexing" help=""/> | |
197 <param name="max_pos" type="integer" min="0" max="100000" value="10000" label="Maximum number of positions to store for each k-mer for database indexing" help="With 0, all positions are stored"/> | |
198 </inputs> | 234 </inputs> |
199 | |
200 <outputs> | 235 <outputs> |
201 <data format_source="input_sequence_file" name="aligned_sequence_file" | 236 <data format_source="input_reads" name="output_fastx" from_work_dir="aligned.dat" |
202 metadata="input_sequence_file" from_work_dir="aligned.dat" | 237 label="Aligned reads on ${on_string} (${input_reads.datatype.file_ext})"> |
203 label="Aligned sequences on ${on_string} (SortMeRNA)"> | 238 <filter>aligned_fastx['aligned_fastx_selector']</filter> |
204 <filter>((fastx['fastx_test']))</filter> | 239 </data> |
205 </data> | 240 <data format_source="input_reads" name="output_other" from_work_dir="other_file.dat" |
206 | 241 label="Rejected reads on ${on_string} (${input_reads.datatype.file_ext})"> |
207 <data format_source="input_sequence_file" name="rejected_sequence_file" | 242 <filter>aligned_fastx['aligned_fastx_selector'] and aligned_fastx['other']</filter> |
208 metadata="input_sequence_file" from_work_dir="other_file.dat" | 243 </data> |
209 label="Rejected sequences on ${on_string} (SortMeRNA)"> | 244 <data format="sam" name="output_sam" from_work_dir="aligned.sam" |
210 <filter>((fastx['fastx_test'] and fastx['fastx_rejected']))</filter> | 245 label="Alignments on ${on_string} (SAM)"> |
211 </data> | 246 <filter>aligned_sam['aligned_sam_selector']</filter> |
212 | 247 </data> |
213 <data format="sam" name="sam_alignment_file" metadata="input_sequence_file" | 248 <data format="tabular" name="output_blast" from_work_dir="aligned.blast" |
214 from_work_dir="aligned.sam" | 249 label="Alignments on ${on_string} (BLAST)"> |
215 label="SAM alignments on ${on_string} (SortMeRNA)"> | 250 <filter>aligned_blast</filter> |
216 <filter>((sam['sam_test']]))</filter> | |
217 </data> | |
218 | |
219 <data format="tabular" name="blast_output_file" | |
220 metadata="input_sequence_file" from_work_dir="aligned.blast" | |
221 label="Blast alignments on ${on_string} (SortMeRNA)"> | |
222 <filter>blast_format</filter> | |
223 <change_format> | 251 <change_format> |
224 <when input="blast_format" value="--blast 0" format="txt" /> | 252 <when input="aligned_blast" value="--blast 0" format="txt" /> |
225 </change_format> | 253 </change_format> |
226 </data> | 254 </data> |
227 | 255 <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="aligned.log"> |
228 <data format="txt" name="output_log" metadata="input_sequence_file" | |
229 from_work_dir="aligned.log" label="Log on ${on_string} (SortMeRNA)"> | |
230 <filter>log</filter> | 256 <filter>log</filter> |
231 </data> | 257 </data> |
232 </outputs> | 258 </outputs> |
233 | |
234 <tests> | 259 <tests> |
235 <test> | 260 <test> |
236 <param name="input_sequence_file" value="sortmerna_input_sequences.fastq" ftype="fastq"/> | 261 <param name="input_reads" value="read_small.fastq" /> |
262 <param name="sequencing_type_selector" value="not_paired" /> | |
263 <param name="strand_search" value="" /> | |
237 <param name="databases_selector" value="history" /> | 264 <param name="databases_selector" value="history" /> |
238 <param name="databases_name" value="sortmerna_db.fasta" ftype="fasta"/> | 265 <param name="database_name" value="ref_small.fasta" /> |
239 <param name="fastx_test" value="--fastx" /> | 266 <param name="other" value="True" /> |
240 <param name='fastx_rejected' value="True"/> | 267 <param name="log" value="" /> |
241 <param name="sam_test" value="" /> | 268 <output name="output_fastx" file="sortmerna_wrapper_accept1.fastq" /> |
242 <param name="blast_format" value="--blast 3" /> | 269 <output name="output_other" file="sortmerna_wrapper_other1.fastq" /> |
243 <param name='log' value="" /> | 270 <output name="output_sam" file="sortmerna_wrapper_sam1.sam" lines_diff="2" /> |
244 <param name="report_type" value="best" /> | 271 </test> |
245 <param name="report_best_type" value="1" /> | 272 <test> |
246 <param name="report_best_min_lis" value="2" /> | 273 <param name="input_reads" value="read_small.fasta" /> |
247 <param name="e_value" value="1" /> | 274 <param name="sequencing_type_selector" value="not_paired" /> |
248 <param name="match" value="2" /> | 275 <param name="strand_search" value="" /> |
249 <param name="mismatch" value="-3" /> | 276 <param name="databases_selector" value="history" /> |
250 <param name="gap_open" value="5" /> | 277 <param name="database_name" value="ref_small.fasta" /> |
251 <param name="gap_ext" value="2" /> | 278 <param name="other" value="True" /> |
252 <param name="ambiguous_letter" value="-3" /> | 279 <param name="log" value="" /> |
253 <param name="strand" value="both" /> | 280 <output name="output_fastx" file="sortmerna_wrapper_accept2.fasta" /> |
254 <param name="seed_length" value="18" /> | 281 <output name="output_other" file="sortmerna_wrapper_other2.fasta" /> |
255 <param name="max_pos" value="10000" /> | 282 <output name="output_sam" file="sortmerna_wrapper_sam2.sam" lines_diff="2" /> |
256 | |
257 <output name="aligned_sequence_file" file="sortmerna_aligned_sequences.fastq" ftype="fastq"/> | |
258 <output name="rejected_sequence_file" file="sortmerna_rejected_sequences.fastq" ftype="fastq"/> | |
259 <output name="blast_output_file" file="sortmerna_blast_output.tabular" ftype="tabular"/> | |
260 </test> | 283 </test> |
261 </tests> | 284 </tests> |
262 | 285 <help> |
263 <help><![CDATA[ | 286 <![CDATA[ |
264 | |
265 **What it does** | 287 **What it does** |
266 | 288 |
267 SortMeRNA is a tool for RNA filtering based on local sequence alignment against | 289 SortMeRNA_ is a software designed to rapidly filter ribosomal RNA fragments |
268 rRNA database. For more information, check the `user manual <http://bioinfo.lifl.fr/RNA/sortmerna/code/SortMeRNA-user-manual-v1.7.pdf>`_. | 290 from metatransriptomic data produced by next-generation sequencers. |
269 | 291 It is capable of handling large RNA databases and sorting out all fragments |
270 ----- | 292 matching to the database with high accuracy and specificity. |
293 | |
294 .. _SortMeRNA: http://bioinfo.lifl.fr/RNA/sortmerna/ | |
295 | |
271 | 296 |
272 **Input** | 297 **Input** |
273 | 298 |
274 The input is a sequence file in fasta or fastq and databases to search against. | 299 The input is one file of reads in FASTA or FASTQ format and any number of rRNA databases to search against. |
275 These databases have to be indexed before the sequence alignment. | 300 If the user has two foward-reverse paired-sequencing reads files, they may use |
276 | 301 the script "merge_paired_reads.sh" to interleave the reads into one file, preserving their order. |
277 SortMeRNA is distributed with 8 rRNA databases constructed from SILVA SSU,LSU | 302 |
278 (version 111) and the RFAM 5/5.8S (version 11.0) databases: | 303 If the sequencing type for the reads is paired-ended, the user has two options under |
279 | 304 "Sequencing type" to filter the reads and preserve their order in the file. |
280 - SILVA 16S bacteria | 305 For a further example of each option, please refer to Section 4.2.3 in the `SortMeRNA User Manual`_. |
281 - SILVA 16S archaea | 306 |
282 - SILVA 18S eukarya | 307 .. _sortmerna user manual: http://bioinfo.lifl.fr/RNA/sortmerna/code/SortMeRNA-user-manual-v1.7.pdf |
283 - SILVA 23S bacteria | 308 |
284 - SILVA 23s archaea | 309 |
285 - SILVA 28S eukarya | 310 **Output** |
286 - Rfam 5S archaea/bacteria | 311 |
287 - Rfam 5.8S eukarya | 312 The output will follow the same format (FASTA or FASTQ) as the reads. Optionally, a statistic file for the rRNA content of reads, as well as rRNA subunit distribution can be generated. |
288 | 313 |
289 These databases are available as public ribosomal databases. But local databases | 314 |
290 can also be used. | 315 **rRNA databases** |
291 | 316 |
292 ----- | 317 SortMeRNA is distributed with 8 representative rRNA databases, which were |
293 | 318 all constructed from the SILVA SSU,LSU (version 111) and the RFAM 5/5.8S |
294 **Parameters** | 319 (version 11.0) databases using the tool UCLUST. |
295 | 320 |
296 The database index can be modulated by: | 321 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ |
297 | 322 | Representative database | id % | average id% | # seq (clustered) | Origin | # seq (original) | |
298 - Seed length | 323 +==========================+======+=============+===================+========================+===================+ |
299 - Maximum number of positions to store for each k-mer for database indexing | 324 | SILVA 16S bacteria | 85 | 91.6 | 8174 | SILVA SSU Ref NR v.111 | 244077 | |
300 | 325 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ |
301 For RNA sorting, the parameters are: | 326 | SILVA 16S archaea | 95 | 96.7 | 3845 | SILVA SSU Ref NR v.111 | 10919 | |
302 | 327 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ |
303 - Test to output files in fasta or fastq, in sam and/or in blast format | 328 | SILVA 18S eukarya | 95 | 96.7 | 4512 | SILVA SSU Ref NR v.111 | 31862 | |
304 - Test for conservation of rejected sequences | 329 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ |
305 - Choice in blast format | 330 | SILVA 23S bacteria | 98 | 99.4 | 3055 | SILVA LSU Ref v.111 | 19580 | |
306 - Test to add SQ tags in sam file | 331 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ |
307 - Filtering and read mapping parameters | 332 | SILVA 23s archaea | 98 | 99.5 | 164 | SILVA LSU Ref v.111 | 405 | |
308 - Test for conservation of best alignment or first alignment | 333 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ |
309 - Number of searched, conserved alignments | 334 | SILVA 28S eukarya | 98 | 99.1 | 4578 | SILVA LSU Ref v.111 | 9321 | |
310 - E-value threshold | 335 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ |
311 - SW score for a match, for a mismatch, for introducing a gap, for extending a gap, for ambigous letters | 336 | Rfam 5S archaea/bacteria | 98 | 99.2 | 59513 | RFAM | 116760 | |
312 - Strand to search | 337 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ |
313 | 338 | Rfam 5.8S eukarya | 98 | 98.9 | 13034 | RFAM | 225185 | |
314 ----- | 339 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ |
315 | 340 |
316 **Outputs** | 341 id %: members of the cluster must have identity at least 'id %' identity with the representative sequence |
317 | 342 |
318 Given the choosen parameters, several outputs are possible | 343 average id %: average identity of a cluster member to the representative sequence |
319 | 344 |
320 - Sequence file in fasta or fastq with aligned sequences (or conserved) | 345 The user may also choose to use their own rRNA databases. |
321 - Sequence file in fasta or fastq with rejected sequences | 346 |
322 - File with sam alignments | 347 .. class:: warningmark |
323 - File with blast outputs | 348 |
324 | 349 Note that your personal databases are indexed each time. The public ribosomal |
325 | 350 databases are indexed when added, but they can be re-indexed with non-default indexing |
326 ]]></help> | 351 parameters. The indexing may take some time depending on the size of the given database. |
327 | 352 |
353 ]]> | |
354 </help> | |
355 | |
328 <citations> | 356 <citations> |
329 <citation type="doi">10.1093/bioinformatics/bts611</citation> | 357 <citation type="doi">10.1093/bioinformatics/bts611</citation> |
358 <citation type="doi">10.1093/nar/gks1219</citation> | |
359 <citation type="doi">10.1093/nar/gks1005</citation> | |
360 <citation type="doi">10.1093/bioinformatics/btq461</citation> | |
361 <citation type="doi">10.1038/nbt.2198</citation> | |
330 </citations> | 362 </citations> |
331 </tool> | 363 </tool> |