view split_libraries_fastq.xml @ 0:c1bd0c560018 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime commit bcbe76277f3e60303faf826f8ce7f018bc663a9a-dirty
author bebatut
date Tue, 02 Feb 2016 05:50:37 -0500
parents
children
line wrap: on
line source

<tool id="split_libraries_fastq" name="Split fastq libraries" version="1.9.1">
    <description>to performs demultiplexing of Fastq sequence data</description>

    <macros>
        <import>macros.xml</import>
    </macros>

    <expand macro="requirements" />

    <version_command><![CDATA[
        split_libraries_fastq.py --version
    ]]>
    </version_command>

    <command><![CDATA[
        split_libraries_fastq.py

            #set $seq_files = ''
            #set $sep = ''
            #for $file in $input_fastq_files 
                #set $seq_files += $sep + str($file)
                #set $sep = ','
            #end for
            -i $seq_files

            -o split_libraries

            #set $mapping_files = ''
            #set $sep = ''
            #for $file in $input_mapping_files 
                #set $mapping_files += $sep + str($file)
                #set $sep = ','
            #end for
            -m $mapping_files

            #set $barcode_files = ''
            #set $sep = ''
            #for $file in $input_files_barcode_read_fps 
                #set $barcode_files += $sep + str($file)
                #set $sep = ','
            #end for
            -b $barcode_files

            $store_qual_scores

            #if str($sample_ids):
                --sample_ids $sample_ids
            #end if

            $store_demultiplexed_fastq
            $retain_unassigned_reads
            -r $max_bad_run_length
            -p $min_per_read_length_fraction
            -n $sequence_max_n
            -s $start_seq_id

            $rev_comp_barcode
            $rev_comp_mapping_barcodes
            $rev_comp

            -q $phred_quality_threshold
            
            #if str( $barcode_type.barcode_selector ) != "custom_length"
                --barcode_type $barcode_type.barcode_selector
            #else
                --barcode_type $barcode_type.barcode_length
            #end if

            --max_barcode_errors $max_barcode_errors

            $phred_offset
]]>
    </command>

	<inputs>
        <param name="input_fastq_files" type="data" 
            format="fastq,fastqsanger,fastqsolexa" 
            label="Input fastq files" multiple="True" help="(-i/--sequence_read_fps)"/> 

		<param name="input_mapping_files" type="data" 
            format="txt,tabular,tsv,csv" label="Metadata mapping files (optional)" 
            multiple="True" optional="True" help="(-m/--mapping_fps)"/> 

        <param name="input_files_barcode_read_fps" type="data" 
            format="fastq,fastqsanger,fastqsolexa" label="Barcode read files (optional)" 
            multiple="True" optional="True" help="(-b/--barcode_read_fps)"/> 

		<param name="store_qual_scores" type="boolean" label="Store quality strings
            in files?" truevalue="--store_qual_scores" falsevalue="" checked="False" 
            help="(--store_qual_scores)" />

        <param name="sample_ids" type="text" label="Comma-separated list of samples 
            ids to be applied to all sequences (optional)" optional="True" 
            help="It must be one per input file path (used when data is not 
            multiplexed, --sample_ids)"/> 

        <param name="store_demultiplexed_fastq" type="boolean" label="Write
            demultiplexed fastq files?" truevalue="--store_demultiplexed_fastq" 
            falsevalue="" checked="False" help="(--store_demultiplexed_fastq)" />

        <param name="retain_unassigned_reads" type="boolean" label="Retain 
            sequences which don’t map to a barcode in the mapping file?" 
            truevalue="--retain_unassigned_reads" falsevalue="" checked="False" 
            help="Sample ID will be 'Unassigned' (--retain_unassigned_reads)" />

        <param name="max_bad_run_length" type="integer" value="3"
            label="Maximum number of consecutive low quality base calls allowed 
            before truncating a read" help="(-r/--max_bad_run_length)" />

        <param name="min_per_read_length_fraction" type="float" value="0.75"
            label="Minimum number of consecutive high quality base calls to 
            include a read (per single end read) as a fraction of the input read 
            length" help="(-p/--min_per_read_length_fraction)" />

        <param name="sequence_max_n" type="integer" value="0"
            label="Maximum number of N characters allowed in a sequence to retain 
            it" help="This is applied after quality trimming, and is total over 
            combined paired end reads if applicable (-n/--sequence_max_n)" />

        <param name="start_seq_id" type="integer" value="0"
            label="Start seq_ids as ascending integers beginning with start_seq_id" 
            help="(-s/--start_seq_id)" />

        <param name="rev_comp_barcode" type="boolean" label="Reverse complement 
            barcode reads before lookup?" truevalue="--rev_comp_barcode" 
            falsevalue="" checked="False" help="(--rev_comp_barcode)" />

        <param name="rev_comp_mapping_barcodes" type="boolean" label="Reverse 
            complement barcode in mapping before lookup?" 
            truevalue="--rev_comp_mapping_barcodes" falsevalue="" checked="False" 
            help="It is useful if barcodes in mapping file are reverse 
            complements of golay codes (--rev_comp_mapping_barcodes)" />

        <param name="rev_comp" type="boolean" label="Reverse omplement sequence 
            before writing to output file?" truevalue="--rev_comp" falsevalue="" 
            checked="False" help="(--rev_comp)" />

        <param name="phred_quality_threshold" type="integer" value="3"
            label="Maximum unacceptable Phred quality score" help="E.g., for
            Q20 and better, 19 must be specified (-q/--phred_quality_threshold)" />
		
		<conditional name="barcode_type">
            <param name="barcode_selector" type="select" label="Type of barcode"
                help="(--barcode_type)">
                <option value="hamming_8">hamming_8</option>
                <option value="golay_12" selected="true">golay_12</option>
                <option value="variable_length">variable_length (disable any barcode correction)</option>
                <option value="custom_length">Custom length</option>
                <option value="not-barcoded">Data not barcoded</option>
            </param>
            <when value="hamming_8" />
            <when value="golay_12" />
            <when value="variable_length" />
            <when value="custom_length">
                <param name="barcode_length" type="integer" value="4" 
                label="Barcode length"/>
            </when>
            <when value="not-barcoded" />
        </conditional>

        <param name="max_barcode_errors" type="float" value="1.5"
            label="Maximum number of errors in barcode" 
            help="(--max_barcode_errors)" />

        <param name="phred_offset" type="select" label="Ascii offset to use when 
            decoding phred scores" help="(--phred_offset)">
            <option value="--phred_offset 33">33</option>
            <option value="--phred_offset 64">64</option>
            <option value="" selected="true">Automatically determined</option>
        </param>
    </inputs>

	<outputs>
		<data name="log" format="txt" 
            from_work_dir="split_libraries/split_library_log.txt"
            label="${tool.name} on ${on_string}: log"/>
        <data name="histograms" format="tabular" 
            from_work_dir="split_libraries/histograms.txt"
            label="${tool.name} on ${on_string}: histograms"/>
        <data name="seqs" format="fasta" 
            from_work_dir="split_libraries/seqs.fna"
            label="${tool.name} on ${on_string}: sequences"/>
        <data name="seqs_qual" format="qual" 
            from_work_dir="split_libraries/seqs.qual"
            label="${tool.name} on ${on_string}: sequence qualities">
            <filter>store_qual_scores is True</filter>
        </data>
        <data name="seqs_fastq" format="fastq"
            from_work_dir="split_libraries/seqs.fastq"
            label="${tool.name} on ${on_string}: demultiplexed sequences (fastq)">
            <filter>store_demultiplexed_fastq is True</filter>
        </data>
    </outputs>

    <tests>
        <test>
            <param name="input_fastq_files" value="forward_reads.fastq"/>
            <param name="input_mapping_files" value="map.tsv"/>
            <param name="input_files_barcode_read_fps" value="barcodes.fastq"/>
            <param name="store_qual_scores" value="--store_qual_scores" />
            <param name="store_demultiplexed_fastq" value="--store_demultiplexed_fastq" />
            <param name="retain_unassigned_reads" value=""/>
            <param name="max_bad_run_length" value="3"/>
            <param name="min_per_read_length_fraction" value="0.75"/>
            <param name="sequence_max_n" value="0"/>
            <param name="start_seq_id" value="0" />
            <param name="rev_comp_barcode" value="" />
            <param name="rev_comp_mapping_barcodes" value="" />
            <param name="rev_comp" value="" />
            <param name="start_seq_id" value="3"/>
            <param name="barcode_selector" value="golay_12"/>
            <param name="max_barcode_errors" value="1.5"/>
            <param name="phred_offset" value="" />

            <output name="log" file="split_fastq_libraries_log.txt"/>
            <output name="seqs" file="split_fastq_libraries_sequences.fasta"/>
            <output name="histograms" file="split_fastq_libraries_histograms.tabular"/>
            <output name="histograms" file="split_fastq_libraries_histograms.tabular"/>
            <output name="seqs_qual" file="split_fastq_libraries_sequence_qualities.qual"/>
            <output name="seqs_fastq" file="split_fastq_libraries_demultiplexed_sequences.fastq"/>
        </test>
    </tests>

    <help><![CDATA[

**What it does**

This tool performs demultiplexing of Fastq sequence data where barcodes and sequences are contained in two separate fastq files (common on Illumina runs).

More information about this tool is available on 
`QIIME documentation <http://qiime.org/scripts/split_libraries_fastq.html>`_.
]]>
    </help>

    <citations>
        <expand macro="citations" />
    </citations>

</tool>