Mercurial > repos > rnateam > sortmerna
comparison sortmerna.xml @ 3:42509ccf8f59 draft
Uploaded
| author | iuc |
|---|---|
| date | Tue, 04 Aug 2015 15:14:58 -0400 |
| parents | |
| children | e6727cef3083 |
comparison
equal
deleted
inserted
replaced
| 2:6f23678fc6e9 | 3:42509ccf8f59 |
|---|---|
| 1 <tool id="bg_sortmerna" name="Filter with SortMeRNA" version="2.0.0"> | |
| 2 <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description> | |
| 3 <requirements> | |
| 4 <requirement type='package' version="2.0">sortmerna</requirement> | |
| 5 </requirements> | |
| 6 <stdio> | |
| 7 <regex match="This program builds a Burst trie on an input rRNA database" | |
| 8 source="both" | |
| 9 level="fatal" | |
| 10 description="Buildtrie program failed to execute." /> | |
| 11 <regex match="The database name" | |
| 12 source="both" | |
| 13 level="fatal" | |
| 14 description="The database ${databases} has not been preprocessed using buildtrie before using SortMeRNA." /> | |
| 15 </stdio> | |
| 16 <version_command> | |
| 17 <![CDATA[ | |
| 18 sortmerna --version 2>&1|grep 'SortMeRNA version' | |
| 19 ]]> | |
| 20 </version_command> | |
| 21 <command> | |
| 22 <![CDATA[ | |
| 23 #set $ref = '' | |
| 24 #set $sep='' | |
| 25 #if str( $databases_type.databases_selector ) == 'history': | |
| 26 #for $db in $databases_type.database_name | |
| 27 #set $ref += $sep + str($db) + ',' + $os.path.splitext($os.path.basename(str($db)))[0] | |
| 28 #set $sep = ':' | |
| 29 #end for | |
| 30 indexdb_rna --ref $ref | |
| 31 #else: | |
| 32 ## databases path is not directly accessible, must match by hand with LOC file contents | |
| 33 #set $data_table = dict([(_[0], _[2]) for _ in $databases_type.input_databases.input.options.tool_data_table.data]) | |
| 34 #for $db in $databases_type.input_databases.value | |
| 35 #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0] | |
| 36 #set $sep = ':' | |
| 37 #end for | |
| 38 #end if | |
| 39 && | |
| 40 sortmerna --ref $ref --reads $input_reads --aligned aligned | |
| 41 #if str( $sequencing_type.sequencing_type_selector ) == 'paired' | |
| 42 $sequencing_type.paired_type | |
| 43 #end if | |
| 44 $strand_search | |
| 45 $aligned_fastx.aligned_fastx_selector | |
| 46 #if $aligned_fastx.aligned_fastx_selector == '--fastx' | |
| 47 #if $aligned_fastx.other | |
| 48 --other other_file | |
| 49 #end if | |
| 50 #end if | |
| 51 $aligned_sam.aligned_sam_selector | |
| 52 #if $aligned_sam.aligned_sam_selector == '--sam' | |
| 53 $aligned_sam.sq | |
| 54 #end if | |
| 55 $aligned_blast | |
| 56 $log | |
| 57 -a \${GALAXY_SLOTS:-1} | |
| 58 ]]> | |
| 59 </command> | |
| 60 <inputs> | |
| 61 <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences" help="In FASTA or FASTQ format (--reads)"/> | |
| 62 <conditional name="sequencing_type"> | |
| 63 <param name="sequencing_type_selector" type="select" label="Sequencing type"> | |
| 64 <option value="not_paired">Reads are not paired</option> | |
| 65 <option value="paired">Reads are paired</option> | |
| 66 </param> | |
| 67 <when value="paired"> | |
| 68 <param name="paired_type" type="select" display="radio" label="If one of the paired-end reads aligns and the other one does not"> | |
| 69 <option value="">leave the reads split between aligned and rejected files</option> | |
| 70 <option value="--paired-in">output both reads to aligned file (--paired-in)</option> | |
| 71 <option value="--paired-out">output both reads to rejected file (--paired-out)</option> | |
| 72 </param> | |
| 73 </when> | |
| 74 </conditional> | |
| 75 | |
| 76 <param name="strand_search" type="select" label="Which strands to search" display="radio"> | |
| 77 <option value="">Search both strands</option> | |
| 78 <option value="-F">Search only the forward strand (-F)</option> | |
| 79 <option value="-R">Search only the reverse-complementary strand (-R)</option> | |
| 80 </param> | |
| 81 | |
| 82 <conditional name="databases_type"> | |
| 83 <param name="databases_selector" type="select" label="Databases to query" | |
| 84 help="Public rRNA databases provided with SortMeRNA have been indexed. | |
| 85 On the contrary, personal databases must be indexed each time SortMeRNA is launched. | |
| 86 Please be patient, this may take some time depending on the size of the given database."> | |
| 87 <option value="cached" selected="true">Public ribosomal databases</option> | |
| 88 <option value="history">Databases from your history</option> | |
| 89 </param> | |
| 90 <when value="cached"> | |
| 91 <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true"> | |
| 92 <options from_data_table="rRNA_databases" /> | |
| 93 <validator type="no_options" message="Select at least one database"/> | |
| 94 </param> | |
| 95 </when> | |
| 96 <when value="history"> | |
| 97 <param name="database_name" type="data" format="fasta" multiple="true" label="rRNA databases" | |
| 98 help="Your databases will be indexed first, which may take up to several minutes."/> | |
| 99 </when> | |
| 100 </conditional> | |
| 101 | |
| 102 <!-- Outputs --> | |
| 103 <conditional name="aligned_fastx"> | |
| 104 <param name="aligned_fastx_selector" type="select" label="Include aligned reads in FASTA/FASTQ format"> | |
| 105 <option value="--fastx">Yes (--fastx)</option> | |
| 106 <option value="">No</option> | |
| 107 </param> | |
| 108 <when value="--fastx"> | |
| 109 <param name="other" type="boolean" label="Include rejected reads file" help="(--other)" /> | |
| 110 </when> | |
| 111 <when value="" /> | |
| 112 </conditional> | |
| 113 <conditional name="aligned_sam"> | |
| 114 <param name="aligned_sam_selector" type="select" label="Include alignments in SAM format"> | |
| 115 <option value="--sam">Yes (--sam)</option> | |
| 116 <option value="">No</option> | |
| 117 </param> | |
| 118 <when value="--sam"> | |
| 119 <param name="sq" type="boolean" truevalue="--SQ" falsevalue="" label="Add SQ tags to the SAM file" help="(--SQ)" /> | |
| 120 </when> | |
| 121 <when value="" /> | |
| 122 </conditional> | |
| 123 <param name="aligned_blast" type="select" label="Include alignments in BLAST-like format"> | |
| 124 <option value="--blast 0">pairwise (--blast 0)</option> | |
| 125 <option value="--blast 1">tabular BLAST -m 8 format (--blast 1)</option> | |
| 126 <option value="--blast 2">tabular + column for CIGAR (--blast 2)</option> | |
| 127 <option value="--blast 3">tabular + columns for CIGAR and query coverage (--blast 3)</option> | |
| 128 <option value="" selected="true">No</option> | |
| 129 </param> | |
| 130 <param name="log" type="boolean" checked="False" truevalue="--log" falsevalue="" label="Generate statistics file" | |
| 131 help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)"> | |
| 132 </param> | |
| 133 </inputs> | |
| 134 <outputs> | |
| 135 <data format_source="input_reads" name="output_fastx" from_work_dir="aligned.dat" | |
| 136 label="Aligned reads on ${on_string} (${input_reads.datatype.file_ext})"> | |
| 137 <filter>aligned_fastx.aligned_fastx_selector</filter> | |
| 138 </data> | |
| 139 <data format_source="input_reads" name="output_other" from_work_dir="other_file.dat" | |
| 140 label="Rejected reads on ${on_string} (${input_reads.datatype.file_ext})"> | |
| 141 <filter>aligned_fastx.aligned_fastx_selector and aligned_fastx.other</filter> | |
| 142 </data> | |
| 143 <data format="sam" name="output_sam" from_work_dir="aligned.sam" | |
| 144 label="Alignments on ${on_string} (SAM)"> | |
| 145 <filter>aligned_sam.aligned_sam_selector</filter> | |
| 146 </data> | |
| 147 <data format="tabular" name="output_blast" from_work_dir="aligned.blast" | |
| 148 label="Alignments on ${on_string} (SAM)"> | |
| 149 <filter>aligned_blast</filter> | |
| 150 <change_format> | |
| 151 <when input="aligned_blast" value="--blast 0" format="txt" /> | |
| 152 </change_format> | |
| 153 </data> | |
| 154 <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="aligned.log"> | |
| 155 <filter>log</filter> | |
| 156 </data> | |
| 157 </outputs> | |
| 158 <tests> | |
| 159 <test> | |
| 160 <param name="input_reads" value="read_small.fastq" /> | |
| 161 <param name="sequencing_type_selector" value="not_paired" /> | |
| 162 <param name="strand_search" value="" /> | |
| 163 <param name="databases_selector" value="history" /> | |
| 164 <param name="database_name" value="ref_small.fasta" /> | |
| 165 <param name="other" value="True" /> | |
| 166 <param name="log" value="" /> | |
| 167 <output name="output_fastx" file="sortmerna_wrapper_accept1.fastq" /> | |
| 168 <output name="output_other" file="sortmerna_wrapper_other1.fastq" /> | |
| 169 <output name="output_sam" file="sortmerna_wrapper_sam1.sam" lines_diff="2" /> | |
| 170 </test> | |
| 171 <test> | |
| 172 <param name="input_reads" value="read_small.fasta" /> | |
| 173 <param name="sequencing_type_selector" value="not_paired" /> | |
| 174 <param name="strand_search" value="" /> | |
| 175 <param name="databases_selector" value="history" /> | |
| 176 <param name="database_name" value="ref_small.fasta" /> | |
| 177 <param name="other" value="True" /> | |
| 178 <param name="log" value="" /> | |
| 179 <output name="output_fastx" file="sortmerna_wrapper_accept2.fasta" /> | |
| 180 <output name="output_other" file="sortmerna_wrapper_other2.fasta" /> | |
| 181 <output name="output_sam" file="sortmerna_wrapper_sam2.sam" lines_diff="2" /> | |
| 182 </test> | |
| 183 </tests> | |
| 184 <help> | |
| 185 <![CDATA[ | |
| 186 **What it does** | |
| 187 | |
| 188 SortMeRNA_ is a software designed to rapidly filter ribosomal RNA fragments | |
| 189 from metatransriptomic data produced by next-generation sequencers. | |
| 190 It is capable of handling large RNA databases and sorting out all fragments | |
| 191 matching to the database with high accuracy and specificity. | |
| 192 | |
| 193 .. _SortMeRNA: http://bioinfo.lifl.fr/RNA/sortmerna/ | |
| 194 | |
| 195 | |
| 196 **Input** | |
| 197 | |
| 198 The input is one file of reads in FASTA or FASTQ format and any number of rRNA databases to search against. | |
| 199 If the user has two foward-reverse paired-sequencing reads files, they may use | |
| 200 the script "merge_paired_reads.sh" to interleave the reads into one file, preserving their order. | |
| 201 | |
| 202 If the sequencing type for the reads is paired-ended, the user has two options under | |
| 203 "Sequencing type" to filter the reads and preserve their order in the file. | |
| 204 For a further example of each option, please refer to Section 4.2.3 in the `SortMeRNA User Manual`_. | |
| 205 | |
| 206 .. _sortmerna user manual: http://bioinfo.lifl.fr/RNA/sortmerna/code/SortMeRNA-user-manual-v1.7.pdf | |
| 207 | |
| 208 | |
| 209 **Output** | |
| 210 | |
| 211 The output will follow the same format (FASTA or FASTQ) as the reads. Optionally, a statistic file for the rRNA content of reads, as well as rRNA subunit distribution can be generated. | |
| 212 | |
| 213 | |
| 214 **rRNA databases** | |
| 215 | |
| 216 SortMeRNA is distributed with 8 representative rRNA databases, which were | |
| 217 all constructed from the SILVA SSU,LSU (version 111) and the RFAM 5/5.8S | |
| 218 (version 11.0) databases using the tool UCLUST. | |
| 219 | |
| 220 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
| 221 | Representative database | id % | average id% | # seq (clustered) | Origin | # seq (original) | | |
| 222 +==========================+======+=============+===================+========================+===================+ | |
| 223 | SILVA 16S bacteria | 85 | 91.6 | 8174 | SILVA SSU Ref NR v.111 | 244077 | | |
| 224 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
| 225 | SILVA 16S archaea | 95 | 96.7 | 3845 | SILVA SSU Ref NR v.111 | 10919 | | |
| 226 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
| 227 | SILVA 18S eukarya | 95 | 96.7 | 4512 | SILVA SSU Ref NR v.111 | 31862 | | |
| 228 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
| 229 | SILVA 23S bacteria | 98 | 99.4 | 3055 | SILVA LSU Ref v.111 | 19580 | | |
| 230 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
| 231 | SILVA 23s archaea | 98 | 99.5 | 164 | SILVA LSU Ref v.111 | 405 | | |
| 232 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
| 233 | SILVA 28S eukarya | 98 | 99.1 | 4578 | SILVA LSU Ref v.111 | 9321 | | |
| 234 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
| 235 | Rfam 5S archaea/bacteria | 98 | 99.2 | 59513 | RFAM | 116760 | | |
| 236 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
| 237 | Rfam 5.8S eukarya | 98 | 98.9 | 13034 | RFAM | 225185 | | |
| 238 +--------------------------+------+-------------+-------------------+------------------------+-------------------+ | |
| 239 | |
| 240 id %: members of the cluster must have identity at least 'id %' identity with the representative sequence | |
| 241 | |
| 242 average id %: average identity of a cluster member to the representative sequence | |
| 243 | |
| 244 The user may also choose to use their own rRNA databases. | |
| 245 | |
| 246 .. class:: warningmark | |
| 247 | |
| 248 Note that your personal databases are indexed each time, and that | |
| 249 this may take some time depending on the size of the given database. | |
| 250 ]]> | |
| 251 </help> | |
| 252 | |
| 253 <citations> | |
| 254 <citation type="doi">10.1093/bioinformatics/bts611</citation> | |
| 255 <citation type="doi">10.1093/nar/gks1219</citation> | |
| 256 <citation type="doi">10.1093/nar/gks1005</citation> | |
| 257 <citation type="doi">10.1093/bioinformatics/btq461</citation> | |
| 258 <citation type="doi">10.1038/nbt.2198</citation> | |
| 259 </citations> | |
| 260 </tool> |
