Mercurial > repos > bebatut > sortmerna
diff sortmerna.xml @ 15:baab049d3aff draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sortmerna commit e20e66ff81239452b3d75dec16e9e0cc8eb46266-dirty
author | bebatut |
---|---|
date | Wed, 10 Feb 2016 04:02:18 -0500 |
parents | 4016c1db6886 |
children | 1a4662c2d6db |
line wrap: on
line diff
--- a/sortmerna.xml Wed Nov 18 09:19:17 2015 -0500 +++ b/sortmerna.xml Wed Feb 10 04:02:18 2016 -0500 @@ -1,81 +1,97 @@ -<tool id="sortmerna" name="SortMeRNA" version="0.1.0"> - <description>to filter ribosomal RNAs in metatranscriptomic data</description> - +<tool id="bg_sortmerna" name="Filter with SortMeRNA" version="2.1.0"> + <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description> <requirements> <requirement type="package" version="2.0">sortmerna</requirement> </requirements> - <stdio> - <exit_code range="1:" /> + <regex match="This program builds a Burst trie on an input rRNA database" + source="both" + level="fatal" + description="Buildtrie program failed to execute." /> + <regex match="The database name" + source="both" + level="fatal" + description="The database ${databases} has not been preprocessed using buildtrie before using SortMeRNA." /> + <regex match="ERROR" + source="both" + level="fatal" + description="ERROR" /> </stdio> - <version_command> <![CDATA[ -\${SORTMERNADIR}/sortmerna --version 2>&1|grep 'SortMeRNA version' +sortmerna --version 2>&1|grep 'SortMeRNA version' ]]> </version_command> - <command> <![CDATA[ #set $ref = '' #set $sep='' - - #if str( $databases.databases_selector ) == 'history' - #for $db in $databases.databases_name + #if str( $databases_type.databases_selector ) == 'history' + #for $db in $databases_type.database_name #set $ref += $sep + str($db) + ',' + $os.path.splitext($os.path.basename(str($db)))[0] #set $sep = ':' #end for - #else + #else if str( $databases_type.databases_selector ) == 'cached_to_index' ## databases path is not directly accessible, must match by hand with LOC file contents - #set $data_table = dict([(_[0], _[2]) for _ in $databases.databases_input.input.options.tool_data_table.data]) - #for $db in $databases.databases_input.value + #set $data_table = dict([(_[0], _[2]) for _ in $databases_type.input_databases.input.options.tool_data_table.data]) + #for $db in $databases_type.input_databases.value + #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0] + '-reindexed' + #set $sep = ':' + #end for + #else: + ## databases path is not directly accessible, must match by hand with LOC file contents + #set $data_table = dict([(_[0], _[2]) for _ in $databases_type.input_databases.input.options.tool_data_table.data]) + #for $db in $databases_type.input_databases.value #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0] #set $sep = ':' #end for #end if - \${SORTMERNADIR}/indexdb_rna --ref $ref -L $seed_length --max_pos $max_pos - - && - - \${SORTMERNADIR}/sortmerna - --ref $ref - --reads $input_sequence_file - --aligned aligned + #if str( $databases_type.databases_selector ) != 'cached': + indexdb_rna + --ref $ref + -L $databases_type.seed_length + --max_pos $databases_type.max_pos + && + #end if - $fastx.fastx_test - #if $fastx.fastx_test == '--fastx' - #if $fastx.fastx_rejected - --other other_file - #end if - #end if + sortmerna + --ref $ref + --reads $input_reads + --aligned aligned + + #if str( $sequencing_type.sequencing_type_selector ) == 'paired' + $sequencing_type.paired_type + #end if - $sam.sam_test - #if $sam.sam_test == '--sam' - $sam.sam_sq_tag - #end if + $strand_search + $aligned_fastx.aligned_fastx_selector + #if $aligned_fastx.aligned_fastx_selector == '--fastx' + #if $aligned_fastx.other + --other other_file + #end if + #end if + $aligned_sam.aligned_sam_selector + #if $aligned_sam.aligned_sam_selector == '--sam' + $aligned_sam.sq + #end if + $aligned_blast - $blast_format - - $log + $log #if $report.report_type == 'best' - #if $report.report_best.report_best_type == '0' - --best 0 - #else if $report.report_best.report_best_type == '1' + #if $report.report_best.report_best_type == '1' --best 1 --min_lis $report.report_best.report_best_min_lis #else --best $report.report_best.report_best_value - --min_list $report.report_best.report_best_min_lis + --min_lis $report.report_best.report_best_min_lis #end if #else - #if $report.report_num_alignments.report_num_alignments_type == '0' - --num_alignments 0 - #else if $report.report_num_alignments.report_num_alignments_type == '1' - --num_alignments 1 + #if $report.report_num_alignments.report_num_alignments_type == 'other_value' + --num_alignments $report.report_num_alignments.report_num_alignments_value #else - --num_alignments $report.report_num_alignments.report_num_alignments_value + --num_alignments $report.report_num_alignments.report_num_alignments_type #end if #end if @@ -85,94 +101,123 @@ --gap_open $gap_open --gap_ext $gap_ext -N $ambiguous_letter - - #if $strand == 'forward' - -F - #end if - #if $strand == 'reverse' - -R - #end if + -a \${GALAXY_SLOTS:-1} ]]> </command> - <inputs> - <param name="input_sequence_file" type="data" format="fastq,fasta" label="Input sequence file" help=""/> - - <conditional name="databases"> - <param name="databases_selector" type="select" label="Databases to query" help=""> - <option value="cached" selected="true">Public ribosomal databases</option> - <option value="history">Databases from your history</option> + <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences" help="In FASTA or FASTQ format (--reads)"/> + <conditional name="sequencing_type"> + <param name="sequencing_type_selector" type="select" label="Sequencing type"> + <option value="not_paired">Reads are not paired</option> + <option value="paired">Reads are paired</option> </param> - <when value="cached"> - <param name="databases_input" label="rRNA databases" type="select" display="checkboxes" multiple="true"> - <options from_data_table="sortmerna_rRNA_databases" /> - <validator type="no_options" message="Select at least one database"/> + <when value="not_paired" /> + <when value="paired"> + <param name="paired_type" type="select" display="radio" label="If one of the paired-end reads aligns and the other one does not"> + <option value="">leave the reads split between aligned and rejected files</option> + <option value="--paired-in">output both reads to aligned file (--paired-in)</option> + <option value="--paired-out">output both reads to rejected file (--paired-out)</option> </param> </when> - <when value="history"> - <param name="databases_name" type="data" format="fasta" multiple="true" label="rRNA databases" - help=""/> - </when> </conditional> - <conditional name="fastx"> - <param name="fastx_test" type='select' label="Output into Fasta/FastQ file?" help=""> - <option value="--fastx">Yes</option> + <param name="strand_search" type="select" label="Which strands to search"> + <option value="">Search both strands</option> + <option value="-F">Search only the forward strand (-F)</option> + <option value="-R">Search only the reverse-complementary strand (-R)</option> + </param> + + <conditional name="databases_type"> + <param name="databases_selector" type="select" label="Databases to query" + help="Public rRNA databases provided with SortMeRNA have been indexed. + On the contrary, personal databases must be indexed each time SortMeRNA is launched. + Please be patient, this may take some time depending on the size of the given database."> + <option value="cached" selected="true">Public pre-indexed ribosomal databases</option> + <option value="cached_to_index">Public ribosomal databases to index with non default parameters</option> + <option value="history">Databases from your history</option> + </param> + <when value="cached"> + <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true"> + <options from_data_table="rRNA_databases" /> + <validator type="no_options" message="Select at least one database"/> + </param> + </when> + <when value="cached_to_index"> + <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true"> + <options from_data_table="rRNA_databases" /> + <validator type="no_options" message="Select at least one database"/> + </param> + <param name="seed_length" type="integer" min="0" max="100" value="18" label="Seed length for database indexing" help="(-L)"/> + <param name="max_pos" type="integer" min="0" max="100000" value="10000" label="Maximum number of positions to store for each k-mer for database indexing" help="With 0, all positions are stored (--max_pos)"/> + </when> + <when value="history"> + <param name="database_name" type="data" format="fasta" multiple="true" label="rRNA databases" + help="Your databases will be indexed first, which may take up to several minutes."/> + <param name="seed_length" type="integer" min="0" max="100" value="18" label="Seed length for database indexing" help="(-L)"/> + <param name="max_pos" type="integer" min="0" max="100000" value="10000" label="Maximum number of positions to store for each k-mer for database indexing" help="With 0, all positions are stored (--max_pos)"/> + </when> + </conditional> + + <!-- Outputs --> + <conditional name="aligned_fastx"> + <param name="aligned_fastx_selector" type="select" label="Include aligned reads in FASTA/FASTQ format?"> + <option value="--fastx">Yes (--fastx)</option> <option value="">No</option> </param> <when value="--fastx"> - <param name='fastx_rejected' type='boolean' checked="true" label="Conserve rejected reads?" help=""/> + <param name="other" type="boolean" label="Include rejected reads file?" help="(--other)" /> </when> + <when value="" /> </conditional> - - <conditional name="sam"> - <param name="sam_test" type='select' label="Output SAM alignments?" help=""> - <option value="--sam">Yes</option> + <conditional name="aligned_sam"> + <param name="aligned_sam_selector" type="select" label="Include alignments in SAM format?"> + <option value="--sam">Yes (--sam)</option> <option value="">No</option> </param> <when value="--sam"> - <param name='sam_sq_tag' type='boolean' checked="true" truevalue="--SQ" falsevalue="" label="Add SQ tags to SAM file?" help=""/> + <param name="sq" type="boolean" truevalue="--SQ" falsevalue="" label="Add SQ tags to the SAM file" help="(--SQ)" /> </when> + <when value="" /> </conditional> - - <param name="blast_format" type="select" display="radio" label="Format for BLAST output" help=""> - <option value="--blast 0">Pairwise</option> - <option value="--blast 1">Tabular (Blast -m 8 format)</option> - <option value="--blast 2'">Tabular + column for CIGAR</option> - <option value="--blast 3" selected="true">Tabular + columns for CIGAR and query coverage</option> - <option value="">No Blast output</option> + <param name="aligned_blast" type="select" label="Include alignments in BLAST-like format"> + <option value="--blast 0">pairwise (--blast 0)</option> + <option value="--blast 1">tabular BLAST -m 8 format (--blast 1)</option> + <option value="--blast 2">tabular + column for CIGAR (--blast 2)</option> + <option value="--blast 3">tabular + columns for CIGAR and query coverage (--blast 3)</option> + <option value="" selected="true">No</option> </param> - - <param name='log' type='boolean' checked="true" truevalue="--log" falsevalue="" label="Conserve overall statistic output into a log file?" help=""/> - - <conditional name="report"> - <param name="report_type" type="select" display="radio" label="Parameters for filtering and read mapping" help=""> + <param name="log" type="boolean" checked="False" truevalue="--log" falsevalue="" label="Generate statistics file" + help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)"> + </param> + <conditional name="report"> + <param name="report_type" type="select" label="Parameters for filtering and read mapping" help=""> <option value="best" selected="true">Report best alignments per read reaching E-value</option> <option value="num_alignments">Report first alignements per read reaching E-value</option> </param> <when value="best"> <conditional name="report_best"> - <param name="report_best_type" type="select" display="radio" label="Number of searched alignments" help="Only the best alignment is reported"> - <option value="0">All high-candidate reference sequences are searched for alignments (very slow)</option> + <param name="report_best_type" type="select" label="Number of searched alignments" help="Only the best alignment is reported (--best)"> <option value="1" selected="true">Only one high-candidate reference sequence is searched for alignments (fast). The high-candidate sequences are determined heuristically using a LIS of seed matches)</option> <option value="other_value">A custom number of reference sequences are searched for alignments (speed decrease for high value)</option> </param> + <when value="1"> + <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment. (--min_lis)"/> + </when> <when value="other_value"> - <param name="report_best_value" type="integer" min="0" max="100" value="1" label="Number of alignments to be made" help="Only the best one is reported. The computation speed decrease with high value"/> - <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment."/> - </when> - <when value="1"> - <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment."/> + <param name="report_best_value" type="integer" min="2" max="100" value="2" label="Number of alignments to be made" help="Only the best one is reported. The computation speed decrease with high value"/> + <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment. (--min_lis)"/> </when> </conditional> </when> <when value="num_alignments"> <conditional name="report_num_alignments"> - <param name="report_num_alignments_type" type="select" display="radio" label="Number of output alignments" help=""> + <param name="report_num_alignments_type" type="select" label="Number of output alignments" help="(--num_alignments)"> <option value="0">All alignments reaching the E-value threshold are reported (very slow, this option is not suggested for high similarity rRNA databases)</option> <option value="1" selected="true">The first alignment passing E-value threshold are reported (very fast, best choice if only filtering is needed)</option> <option value="other_value">A custom number of alignments are made and reported (speed decrease for high value)</option> </param> + <when value="0" /> + <when value="1" /> <when value="other_value"> <param name="report_num_alignments_value" type="integer" min="0" max="100" value="1" label="Number of alignments to be made and reported" help=""/> </when> @@ -180,152 +225,139 @@ </when> </conditional> - <param name="e_value" type="float" min="0" max="10" value="1" label="E-value threshold" help=""/> - <param name="match" type="integer" min="0" max="10" value="2" label="SW score for a match" help=""/> - <param name="mismatch" type="integer" min="-10" max="0" value="-3" label="SW penalty for a mismatch" help=""/> - <param name="gap_open" type="integer" min="0" max="10" value="5" label="SW penalty for introducing a gap" help=""/> - <param name="gap_ext" type="integer" min="0" max="10" value="2" label="SW penalty for extending a gap" help=""/> - <param name="ambiguous_letter" type="integer" min="-10" max="0" value="-3" label="SW penalty for ambiguous letters (N's)" help=""/> - - <param name="strand" type="select" display="radio" label="Search on" help=""> - <option value="both" selected="true">Both strands</option> - <option value="forward" >Only forward strand</option> - <option value="reverse" >Only reverse-complementary strand</option> - </param> - - <param name="seed_length" type="integer" min="0" max="100" value="18" label="Seed length for database indexing" help=""/> - <param name="max_pos" type="integer" min="0" max="100000" value="10000" label="Maximum number of positions to store for each k-mer for database indexing" help="With 0, all positions are stored"/> + <param name="e_value" type="float" min="0" max="10" value="1" label="E-value threshold" help="(-e)"/> + <param name="match" type="integer" min="0" max="10" value="2" label="SW score for a match" help="(--match)"/> + <param name="mismatch" type="integer" min="-10" max="0" value="-3" label="SW penalty for a mismatch" help="(--mismatch)"/> + <param name="gap_open" type="integer" min="0" max="10" value="5" label="SW penalty for introducing a gap" help="(--gap_open)"/> + <param name="gap_ext" type="integer" min="0" max="10" value="2" label="SW penalty for extending a gap" help="(--gap_ext)"/> + <param name="ambiguous_letter" type="integer" min="-10" max="0" value="-3" label="SW penalty for ambiguous letters (N's)" help="(-N)"/> </inputs> - <outputs> - <data format_source="input_sequence_file" name="aligned_sequence_file" - metadata="input_sequence_file" from_work_dir="aligned.dat" - label="Aligned sequences on ${on_string} (SortMeRNA)"> - <filter>((fastx['fastx_test']))</filter> + <data format_source="input_reads" name="output_fastx" from_work_dir="aligned.dat" + label="Aligned reads on ${on_string} (${input_reads.datatype.file_ext})"> + <filter>aligned_fastx['aligned_fastx_selector']</filter> </data> - - <data format_source="input_sequence_file" name="rejected_sequence_file" - metadata="input_sequence_file" from_work_dir="other_file.dat" - label="Rejected sequences on ${on_string} (SortMeRNA)"> - <filter>((fastx['fastx_test'] and fastx['fastx_rejected']))</filter> + <data format_source="input_reads" name="output_other" from_work_dir="other_file.dat" + label="Rejected reads on ${on_string} (${input_reads.datatype.file_ext})"> + <filter>aligned_fastx['aligned_fastx_selector'] and aligned_fastx['other']</filter> </data> - - <data format="sam" name="sam_alignment_file" metadata="input_sequence_file" - from_work_dir="aligned.sam" - label="SAM alignments on ${on_string} (SortMeRNA)"> - <filter>((sam['sam_test']]))</filter> + <data format="sam" name="output_sam" from_work_dir="aligned.sam" + label="Alignments on ${on_string} (SAM)"> + <filter>aligned_sam['aligned_sam_selector']</filter> </data> - - <data format="tabular" name="blast_output_file" - metadata="input_sequence_file" from_work_dir="aligned.blast" - label="Blast alignments on ${on_string} (SortMeRNA)"> - <filter>blast_format</filter> + <data format="tabular" name="output_blast" from_work_dir="aligned.blast" + label="Alignments on ${on_string} (BLAST)"> + <filter>aligned_blast</filter> <change_format> - <when input="blast_format" value="--blast 0" format="txt" /> + <when input="aligned_blast" value="--blast 0" format="txt" /> </change_format> </data> - - <data format="txt" name="output_log" metadata="input_sequence_file" - from_work_dir="aligned.log" label="Log on ${on_string} (SortMeRNA)"> + <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="aligned.log"> <filter>log</filter> </data> </outputs> - <tests> <test> - <param name="input_sequence_file" value="sortmerna_input_sequences.fastq" ftype="fastq"/> + <param name="input_reads" value="read_small.fastq" /> + <param name="sequencing_type_selector" value="not_paired" /> + <param name="strand_search" value="" /> <param name="databases_selector" value="history" /> - <param name="databases_name" value="sortmerna_db.fasta" ftype="fasta"/> - <param name="fastx_test" value="--fastx" /> - <param name='fastx_rejected' value="True"/> - <param name="sam_test" value="" /> - <param name="blast_format" value="--blast 3" /> - <param name='log' value="" /> - <param name="report_type" value="best" /> - <param name="report_best_type" value="1" /> - <param name="report_best_min_lis" value="2" /> - <param name="e_value" value="1" /> - <param name="match" value="2" /> - <param name="mismatch" value="-3" /> - <param name="gap_open" value="5" /> - <param name="gap_ext" value="2" /> - <param name="ambiguous_letter" value="-3" /> - <param name="strand" value="both" /> - <param name="seed_length" value="18" /> - <param name="max_pos" value="10000" /> - - <output name="aligned_sequence_file" file="sortmerna_aligned_sequences.fastq" ftype="fastq"/> - <output name="rejected_sequence_file" file="sortmerna_rejected_sequences.fastq" ftype="fastq"/> - <output name="blast_output_file" file="sortmerna_blast_output.tabular" ftype="tabular"/> + <param name="database_name" value="ref_small.fasta" /> + <param name="other" value="True" /> + <param name="log" value="" /> + <output name="output_fastx" file="sortmerna_wrapper_accept1.fastq" /> + <output name="output_other" file="sortmerna_wrapper_other1.fastq" /> + <output name="output_sam" file="sortmerna_wrapper_sam1.sam" lines_diff="2" /> + </test> + <test> + <param name="input_reads" value="read_small.fasta" /> + <param name="sequencing_type_selector" value="not_paired" /> + <param name="strand_search" value="" /> + <param name="databases_selector" value="history" /> + <param name="database_name" value="ref_small.fasta" /> + <param name="other" value="True" /> + <param name="log" value="" /> + <output name="output_fastx" file="sortmerna_wrapper_accept2.fasta" /> + <output name="output_other" file="sortmerna_wrapper_other2.fasta" /> + <output name="output_sam" file="sortmerna_wrapper_sam2.sam" lines_diff="2" /> </test> </tests> - - <help><![CDATA[ - + <help> +<![CDATA[ **What it does** -SortMeRNA is a tool for RNA filtering based on local sequence alignment against -rRNA database. For more information, check the `user manual <http://bioinfo.lifl.fr/RNA/sortmerna/code/SortMeRNA-user-manual-v1.7.pdf>`_. +SortMeRNA_ is a software designed to rapidly filter ribosomal RNA fragments +from metatransriptomic data produced by next-generation sequencers. +It is capable of handling large RNA databases and sorting out all fragments +matching to the database with high accuracy and specificity. ------ +.. _SortMeRNA: http://bioinfo.lifl.fr/RNA/sortmerna/ + **Input** -The input is a sequence file in fasta or fastq and databases to search against. -These databases have to be indexed before the sequence alignment. - -SortMeRNA is distributed with 8 rRNA databases constructed from SILVA SSU,LSU -(version 111) and the RFAM 5/5.8S (version 11.0) databases: - - - SILVA 16S bacteria - - SILVA 16S archaea - - SILVA 18S eukarya - - SILVA 23S bacteria - - SILVA 23s archaea - - SILVA 28S eukarya - - Rfam 5S archaea/bacteria - - Rfam 5.8S eukarya - -These databases are available as public ribosomal databases. But local databases -can also be used. - ------ - -**Parameters** - -The database index can be modulated by: +The input is one file of reads in FASTA or FASTQ format and any number of rRNA databases to search against. +If the user has two foward-reverse paired-sequencing reads files, they may use +the script "merge_paired_reads.sh" to interleave the reads into one file, preserving their order. - - Seed length - - Maximum number of positions to store for each k-mer for database indexing +If the sequencing type for the reads is paired-ended, the user has two options under +"Sequencing type" to filter the reads and preserve their order in the file. +For a further example of each option, please refer to Section 4.2.3 in the `SortMeRNA User Manual`_. -For RNA sorting, the parameters are: - - - Test to output files in fasta or fastq, in sam and/or in blast format - - Test for conservation of rejected sequences - - Choice in blast format - - Test to add SQ tags in sam file - - Filtering and read mapping parameters - - Test for conservation of best alignment or first alignment - - Number of searched, conserved alignments - - E-value threshold - - SW score for a match, for a mismatch, for introducing a gap, for extending a gap, for ambigous letters - - Strand to search +.. _sortmerna user manual: http://bioinfo.lifl.fr/RNA/sortmerna/code/SortMeRNA-user-manual-v1.7.pdf + ------ - -**Outputs** +**Output** -Given the choosen parameters, several outputs are possible - - - Sequence file in fasta or fastq with aligned sequences (or conserved) - - Sequence file in fasta or fastq with rejected sequences - - File with sam alignments - - File with blast outputs +The output will follow the same format (FASTA or FASTQ) as the reads. Optionally, a statistic file for the rRNA content of reads, as well as rRNA subunit distribution can be generated. - ]]></help> - +**rRNA databases** + +SortMeRNA is distributed with 8 representative rRNA databases, which were +all constructed from the SILVA SSU,LSU (version 111) and the RFAM 5/5.8S +(version 11.0) databases using the tool UCLUST. + ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| Representative database | id % | average id% | # seq (clustered) | Origin | # seq (original) | ++==========================+======+=============+===================+========================+===================+ +| SILVA 16S bacteria | 85 | 91.6 | 8174 | SILVA SSU Ref NR v.111 | 244077 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| SILVA 16S archaea | 95 | 96.7 | 3845 | SILVA SSU Ref NR v.111 | 10919 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| SILVA 18S eukarya | 95 | 96.7 | 4512 | SILVA SSU Ref NR v.111 | 31862 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| SILVA 23S bacteria | 98 | 99.4 | 3055 | SILVA LSU Ref v.111 | 19580 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| SILVA 23s archaea | 98 | 99.5 | 164 | SILVA LSU Ref v.111 | 405 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| SILVA 28S eukarya | 98 | 99.1 | 4578 | SILVA LSU Ref v.111 | 9321 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| Rfam 5S archaea/bacteria | 98 | 99.2 | 59513 | RFAM | 116760 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ +| Rfam 5.8S eukarya | 98 | 98.9 | 13034 | RFAM | 225185 | ++--------------------------+------+-------------+-------------------+------------------------+-------------------+ + +id %: members of the cluster must have identity at least 'id %' identity with the representative sequence + +average id %: average identity of a cluster member to the representative sequence + +The user may also choose to use their own rRNA databases. + +.. class:: warningmark + +Note that your personal databases are indexed each time. The public ribosomal +databases are indexed when added, but they can be re-indexed with non-default indexing +parameters. The indexing may take some time depending on the size of the given database. + +]]> + </help> + <citations> <citation type="doi">10.1093/bioinformatics/bts611</citation> + <citation type="doi">10.1093/nar/gks1219</citation> + <citation type="doi">10.1093/nar/gks1005</citation> + <citation type="doi">10.1093/bioinformatics/btq461</citation> + <citation type="doi">10.1038/nbt.2198</citation> </citations> -</tool> \ No newline at end of file +</tool>