view spotyping.xml @ 9:c3ae0772ef96 draft

planemo upload
author jowong
date Tue, 13 Nov 2018 03:18:19 -0500
parents ff5b31cdc6ba
children 89e001abff91
line wrap: on
line source

<tool id="spotyping" name="Spoligotype Prediction" version="1.0.1">
    <description>fast and accurate in silico Mycobacterium spoligotyping from sequence reads</description>
    <requirements>
        <requirement type="package" version="2.1">spotyping</requirement>
    </requirements>
    <command detect_errors="aggressive"><![CDATA[
        SpoTyping.py
        $advanced.seq
        $advanced.swift
        --min=$advanced.min
        --rmin=$advanced.min_relax
        #if str( $data_input.data_selector ) == "paired"       
            $data_input.input1.forward $data_input.input1.reverse
        #end if
        #if str( $data_input.data_selector ) == "single"       
            $data_input.input2
        #end if
        && cp SITVIT_ONLINE.*.xls spotyping.xls
        ]]>
    </command>
    <inputs>
        <conditional name="data_input">
            <param name="data_selector" type="select" label="Single or Paired-end Data" help="Select between paired and single end data to add name to dataset">
                <option value="paired">Paired</option>
                <option value="single">Single</option>
            </param>
            <when value="paired">
                <param name="input1" format="data" type="data_collection" label="Select a paired collection" help="a paired data"/>
            </when>
            <when value="single">
                <param name="input2" format="data" type="data_collection" label="input" help="Specify dataset with single reads"/>
            </when>
        </conditional>
        <section name="advanced" title="Advanced options" expanded="false">
            <param type="boolean" argument="--seq" label="Input is assembled sequence" help="Input is either a complete genomic sequence or assembled contigs from an isolate" truevalue="--seq" falsevalue="" checked="false" />
            <param type="boolean" argument="--swift" label="Swift mode" checked="true" truevalue="--swift=on" falsevalue="--swift=off" />
            <param name="min" type="integer" value="5" label="MIN" help="minimum number of error-free hits to support presence of a spacer" />
            <param name="min_relax" type="integer" value="6" label="MIN RELAX" help="minimum number of 1-error-tolerant hits to support presence of a spacer " />
        </section>
    </inputs>
    <outputs>
        <data name="spotyping_results" label="spoligotyping results" format="txt" from_work_dir="SpoTyping"/>
        <data name="spotyping_log" label="spoligotyping log" format="txt" from_work_dir="SpoTyping.log"/>
        <data name="sitvit_database_query" label="query" format="excel.xls" from_work_dir="spotyping.xls"/>
    </outputs>
    <help><![CDATA[
This is a modified version of IUC's wrapper of spotyping without the concatenation and renaming of the input files. The wrapper also runs properly when supplied with paired-end reads

            SpoTyping_ is a software for predicting spoligotype_ from sequencing reads, complete genomic sequences and assembled contigs.

    **Input:**

    - Fastq file - if paired end data is used, you may choose to concatenate paired reads into a single input (e.g. using the cat tool)
    - Fasta file of a complete genomic sequence or assembled contigs of an isolate (with --seq option)

    *Note on input size*: In swift mode the sampling threshold is reached in approximately 30x coverage when using
    paired end sequencing of a *M. tuberculosis* genome.

    **Output:**

    Count of hits from BLAST result for each spacer sequence and predicted spoligotype in the format of binary code and octal code.

    **Options:**


    \--seq
    Set this if input is a fasta file that contains only complete genomic sequence or assembled contigs from an isolate. [Default is off]

    \-s SWIFT, --swift=SWIFT
    Swift mode, either "on" or "off" [Default: on] - swift mode samples 250 million bases to use for spoligotyping

    \-m MIN, --min=MIN
    minimum number of error-free hits to support presence of a spacer [Default: 5]
    
    \-r MIN_RELAX, --rmin=MIN_RELAX
    minimum number of 1-error-tolerant hits to support presence of a spacer [Default: 6].



    **Got weird spoligotype prediction?**

    Sequencing throughput is very low (<40Mbp, for example): SpoTyping may not be able to give accurate prediction due to the relatively low read depth.

    **Interpreting the spoligotype**

    The binary or octal spoligotype can be used to look up lineage information using a service
    like `TB Lineage`_.

    **SITVIT reports**

    Optionally a report on the detected spoligotype can be retrieved from the SITVIT_ database. If such a report is requested it can also be
    illustrated as a (PDF format) plot.

  .. _SpoTyping: https://github.com/xiaeryu/SpoTyping
  .. _spoligotype: https://www.ncbi.nlm.nih.gov/pubmed/19521871
  .. _TB Lineage: http://tbinsight.cs.rpi.edu/run_tb_lineage.html
  .. _SITVIT: http://www.pasteur-guadeloupe.fr:8081/SITVIT_ONLINE/ 
    ]]></help>
    <citations>
        <citation type="bibtex">
@misc{githubSpoTyping,
  author = {Xia, Eryu},
  year = {2016},
  title = {SpoTyping},
  publisher = {GitHub},
  journal = {GitHub repository},
  url = {https://github.com/xiaeryu/SpoTyping},
}</citation>
        <citation type="doi">10.1186/s13073-016-0270-7</citation>
    </citations>
</tool>