changeset 6:20c20bcdec36 draft

planemo upload
author jowong
date Tue, 13 Nov 2018 02:50:56 -0500
parents f1b3a263dd3a
children 1bf76073c034
files spotyping.xml
diffstat 1 files changed, 82 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/spotyping.xml	Mon Nov 05 11:10:46 2018 -0500
+++ b/spotyping.xml	Tue Nov 13 02:50:56 2018 -0500
@@ -1,31 +1,42 @@
-<tool id="spotyping" name="Spoligotype Prediction" version="0.1.4">
+<tool id="spotyping" name="Spoligotype Prediction" version="0.1.5">
+    <description>fast and accurate in silico Mycobacterium spoligotyping from sequence reads</description>
     <requirements>
         <requirement type="package" version="2.1">spotyping</requirement>
     </requirements>
     <command detect_errors="aggressive"><![CDATA[
-        SpoTyping.py 
+        SpoTyping.py
+        $advanced.seq
+        $advanced.swift
+        --min=$advanced.min
+        --rmin=$advanced.min_relax
         #if str( $data_input.data_selector ) == "paired"       
             $data_input.input1.forward $data_input.input1.reverse
         #end if
         #if str( $data_input.data_selector ) == "single"       
-            $data_input.input2.element_identifier
+            $data_input.input2
         #end if
         && cp SITVIT_ONLINE.*.xls spotyping.xls
         ]]>
     </command>
     <inputs>
         <conditional name="data_input">
-          <param name="data_selector" type="select" label="Single or Paired-end Data" help="Select between paired and single end data to add name to dataset">
-            <option value="paired">Paired</option>
-            <option value="single">Single</option>
-          </param>
-          <when value="paired">
-            <param name="input1" format="data" type="data_collection" collection_type="paired" label="Select a paired collection" help="a paired data"/>
-          </when>
-          <when value="single">
-            <param name="input2" format="data" type="data" label="input" help="Specify dataset with single reads"/>
-          </when>
+            <param name="data_selector" type="select" label="Single or Paired-end Data" help="Select between paired and single end data to add name to dataset">
+                <option value="paired">Paired</option>
+                <option value="single">Single</option>
+            </param>
+            <when value="paired">
+                <param name="input1" format="data" type="data_collection" collection_type="paired" label="Select a paired collection" help="a paired data"/>
+            </when>
+            <when value="single">
+                <param name="input2" format="data" type="data" label="input" help="Specify dataset with single reads"/>
+            </when>
         </conditional>
+        <section name="advanced" title="Advanced options" expanded="false">
+            <param type="boolean" argument="--seq" label="Input is assembled sequence" help="Input is either a complete genomic sequence or assembled contigs from an isolate" truevalue="--seq" falsevalue="" checked="false" />
+            <param type="boolean" argument="--swift" label="Swift mode" checked="true" truevalue="--swift=on" falsevalue="--swift=off" />
+            <param name="min" type="integer" value="5" label="MIN" help="minimum number of error-free hits to support presence of a spacer" />
+            <param name="min_relax" type="integer" value="6" label="MIN RELAX" help="minimum number of 1-error-tolerant hits to support presence of a spacer " />
+        </section>
     </inputs>
     <outputs>
         <data name="output1" label="spoligotyping results" format="txt" from_work_dir="SpoTyping"/>
@@ -33,7 +44,63 @@
         <data name="output3" label="query" format="excel.xls" from_work_dir="spotyping.xls"/>
     </outputs>
     <help><![CDATA[
-        Usage: python SpoTyping.py [options] FASTQ_1/FASTA FASTQ_2(optional) 
+This is a modified version of IUC's wrapper of spotyping without the concatenation and renaming or input files. The wrapper also runs properly when supplied with paired-end reads
+
+            SpoTyping_ is a software for predicting spoligotype_ from sequencing reads, complete genomic sequences and assembled contigs.
+
+    **Input:**
+
+    - Fastq file - if paired end data is used, you may choose to concatenate paired reads into a single input (e.g. using the cat tool)
+    - Fasta file of a complete genomic sequence or assembled contigs of an isolate (with --seq option)
+
+    *Note on input size*: In swift mode the sampling threshold is reached in approximately 30x coverage when using
+    paired end sequencing of a *M. tuberculosis* genome.
+
+    **Output:**
+
+    Count of hits from BLAST result for each spacer sequence and predicted spoligotype in the format of binary code and octal code.
+
+    **Options:**
+
+    \--noQuery
+    Avoid querying the SITVIT_ online service to describe the prevalance of the reported spoligotype.
+
+    \--seq
+    Set this if input is a fasta file that contains only complete genomic sequence or assembled contigs from an isolate. [Default is off]
+
+    \-s SWIFT, --swift=SWIFT
+    Swift mode, either "on" or "off" [Default: on] - swift mode samples 250 million bases to use for spoligotyping
+
+    \--sorted
+    Set if input reads are sorted relative to positions on a reference genome. If reads are sorted and swift mode is used, swift mode's sampling is adjusted
+    to sample reads across positions in the genome evenly.
+
+    \--filter
+    Filter reads such that:
+
+    1. Leading and trailing 'N's would be removed.
+    2. Any read with more than 3 'N's in the middle would be removed.
+    3. Any read with more than 7 consecutive bases identical would be trimmed/filtered out given
+       the length of the flanking regions.
+
+    **Got weird spoligotype prediction?**
+
+    Sequencing throughput is very low (<40Mbp, for example): SpoTyping may not be able to give accurate prediction due to the relatively low read depth.
+
+    **Interpreting the spoligotype**
+
+    The binary or octal spoligotype can be used to look up lineage information using a service
+    like `TB Lineage`_.
+
+    **SITVIT reports**
+
+    Optionally a report on the detected spoligotype can be retrieved from the SITVIT_ database. If such a report is requested it can also be
+    illustrated as a (PDF format) plot.
+
+  .. _SpoTyping: https://github.com/xiaeryu/SpoTyping
+  .. _spoligotype: https://www.ncbi.nlm.nih.gov/pubmed/19521871
+  .. _TB Lineage: http://tbinsight.cs.rpi.edu/run_tb_lineage.html
+  .. _SITVIT: http://www.pasteur-guadeloupe.fr:8081/SITVIT_ONLINE/ 
     ]]></help>
     <citations>
         <citation type="bibtex">
@@ -45,5 +112,6 @@
   journal = {GitHub repository},
   url = {https://github.com/xiaeryu/SpoTyping},
 }</citation>
+        <citation type="doi">10.1186/s13073-016-0270-7</citation>
     </citations>
 </tool>