diff blast_xml_to_tabular.xml @ 0:efe0c7b8fb78 draft

planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/blast_xml_to_tabular commit 006cbba6513492f5a06b573c676400a2d464520b-dirty
author dfornika
date Mon, 09 Sep 2019 17:17:09 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast_xml_to_tabular.xml	Mon Sep 09 17:17:09 2019 -0400
@@ -0,0 +1,102 @@
+<tool id="blast_xml_to_tabular" name="BLAST XML to tabular" version="1.1.0">
+    <description>Convert BLAST XML output to tabular</description>
+    <command detect_errors="exit_code">
+        <![CDATA[  
+          '$__tool_directory__/blast_xml_to_tabular.py'
+            '${blastxml_file}'
+            '${tabular_file}'
+            '${out_format}'
+        ]]>
+    </command>
+    <inputs>
+        <param name="blastxml_file" type="data" format="blastxml" label="BLAST results as XML"/> 
+        <param name="out_format" type="select" label="Output format">
+            <option value="std" selected="True">Tabular (standard 12 columns)</option>
+            <option value="ext">Tabular (extended 24 columns)</option>
+            <option value="ext+">Tabular (extended 26 columns)</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="tabular_file" format="tabular" label="BLAST results: data $blastxml_file.hid as tabular" />
+    </outputs>
+    <help>
+
+.. class:: infomark
+
+**What it does**
+
+NCBI BLAST+ (and the older NCBI 'legacy' BLAST) can output in a range of
+formats including tabular and a more detailed XML format. A complex workflow
+may need both the XML and the tabular output - but running BLAST twice is
+slow and wasteful.
+
+This tool takes the BLAST XML output and by default converts it into the
+standard 12 column tabular equivalent:
+
+====== ========= ============================================
+Column NCBI name Description
+------ --------- --------------------------------------------
+     1 qseqid    Query Seq-id (ID of your sequence)
+     2 sseqid    Subject Seq-id (ID of the database hit)
+     3 pident    Percentage of identical matches
+     4 length    Alignment length
+     5 mismatch  Number of mismatches
+     6 gapopen   Number of gap openings
+     7 qstart    Start of alignment in query
+     8 qend      End of alignment in query
+     9 sstart    Start of alignment in subject (database hit)
+    10 send      End of alignment in subject (database hit)
+    11 evalue    Expectation value (E-value)
+    12 bitscore  Bit score
+====== ========= ============================================
+
+The BLAST+ tools can optionally output additional columns of information,
+but this takes longer to calculate. Most (but not all) of these columns are
+included by selecting the extended tabular output. The extra columns are
+included *after* the standard 12 columns. This is so that you can write
+workflow filtering steps that accept either the 12 or 24 column tabular
+BLAST output.
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    13 sallseqid     All subject Seq-id(s), separated by a ';'
+    14 score         Raw score
+    15 nident        Number of identical matches
+    16 positive      Number of positive-scoring matches
+    17 gaps          Total number of gaps
+    18 ppos          Percentage of positive-scoring matches
+    19 qframe        Query frame
+    20 sframe        Subject frame
+    21 qseq          Aligned part of query sequence
+    22 sseq          Aligned part of subject sequence
+    23 qlen          Query sequence length
+    24 slen          Subject sequence length
+====== ============= ===========================================
+
+Very slight modifications were made to the "BLAST XML to tabular" tool that
+ships with Galaxy to output two more column columns:
+
+====== ============= ===========================================
+Column NCBI name     Description
+------ ------------- -------------------------------------------
+    25 pcov          Percentage coverage
+    26 sallseqdescr  All subject Seq-descr(s), separated by a ','
+====== ============= ===========================================
+
+----
+
+.. class:: infomark
+
+This is a slightly modified version of a tool that ships with Galaxy.
+If the 12 or 24 columns formats are desired, use the original tool.
+
+.. class:: warningmark
+
+Beware that the XML file (and thus the conversion) and the tabular output
+direct from BLAST+ may differ in the presence of XXXX masking on regions
+low complexity (columns 21 and 22), and thus also calculated figures like
+the percentage identity (column 3).
+
+    </help>
+</tool>