view format_cd_hit_output.xml @ 0:bbd903996900 draft

planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit ffb68b2ddd94854a34a2533105f7bc08884c6e38-dirty
author bebatut
date Wed, 27 Jan 2016 03:28:42 -0500
parents
children 4ba41bcee051
line wrap: on
line source

<tool id="format_cd_hit_output" name="Format cd-hit outputs" version="0.1.0">
    <requirements>
    </requirements>

    <stdio>
        <exit_code range="1:" />
    </stdio>

    <version_command><![CDATA[python -version]]></version_command>

    <command><![CDATA[
        python $__tool_directory__/format_cd_hit_output.py
            --input_cluster_info $input_cluster_info

            #if $rename_representative_sequences.test
                --input_representative_sequences $rename_representative_sequences.input_representative_sequences
                --output_representative_sequences $output_representative_sequences
            #end if

            #if $extract_category_distribution.test
                --input_mapping $extract_category_distribution.input_mapping
                --output_category_distribution $output_category_distribution
                --number_sum $extract_category_distribution.number_sum
            #end if
    ]]>
    </command>

    <inputs>
        <param type="data" format="txt" name="input_cluster_info" label="Cluster
            info"/>

        <conditional name="rename_representative_sequences">
            <param name='test' type='boolean' checked="true" 
                label="Rename representative sequences with the 
                corresponding cluster name?" help=""/>
            <when value="true">
                <param type="data" format="fasta" 
                    name="input_representative_sequences"
                    label="Representative sequences"/>
            </when>
            <when value="false" />
        </conditional>

        <conditional name="extract_category_distribution">
            <param name='test' type='boolean' checked="true" 
                label="Extract category distribution of each 
                cluster?" help=""/>
            <when value="true">
                <param type="data" format="tabular" name="input_mapping" 
                    label="Mapping file" help="The mapping file is a tabular 
                    file with 2 columns. First column contains the sequence 
                    names and the second one the corresponding category"/>
                <param name='number_sum' type='boolean' checked="true" 
                    label="Sum sequence number 
                    for each category?" help="The alternative is the sum of size
                    for sequences in each category (if the size information is
                    available in sequence name)"/>
            </when>
            <when value="false" />
        </conditional>
    </inputs>

    <outputs>
        <data name="output_representative_sequences" format="fasta"
            label="${tool.name} on ${on_string}: Renamed representative sequences">
            <filter>((rename_representative_sequences['test']))</filter>
        </data>
        <data name="output_category_distribution" format="tabular" 
            label="${tool.name} on ${on_string}: Category distribution">
            <filter>((extract_category_distribution['test']))</filter>
        </data>
    </outputs>

    <tests>
        <test>
            <param name="input_representative_sequences" 
                value="input_representative_sequences.fasta"/>
            <param name="rename_representative_sequences.test" value="yes"/>
            <param name="input_cluster_info" value="input_cluster_info.txt"/>
            <param name="extract_category_distribution.test" value="yes"/>
            <param name="input_mapping_file" value="input_mapping.txt"/>
            <output name="output_representative_sequences" 
                file="output_representative_sequences.fasta"/>
            <output name="output_category_distribution" 
                file="output_category_distribution.txt"/>
        </test>
    </tests>

    <help><![CDATA[

**What it does**

This tool format cd-hit outputs (cluster information and cluster representative 
sequences) to rename the representative sequences with cluster name and/or extract 
category distribution of each cluster

-----

**Inputs**

The tool takes as input:

 - The cd-hit output file with cluster information
 - The cd-hit output file with representative sequences for each cluster (optional)
 - A mapping file in tabular format with first column being the sequence names 
   (corresponding to the ones in cluster information file) and the second column being 
   the corresponding categories (optional)

-----

**Outputs**

The tool generates different outputs given the choosen parameters:

 - A file with representative sequences of each cluster named with the cluster name
 - A tabular file with lines corresponding to clusters, columns to categories (and
   one column with sequence number in the cluster), and cases to number of sequences 
   of the given category in the cluster

]]>
    </help>

    <citations></citations>
</tool>