|
2
|
1 <tool id="PHYLO_filter" name="Filter and trim" version="1.3.0">
|
|
|
2 <description>sequences</description>
|
|
|
3 <requirements>
|
|
|
4 <requirement type="package">yapp_env</requirement>
|
|
|
5 </requirements>
|
|
|
6 <macros>
|
|
|
7 <import>macros.xml</import>
|
|
|
8 </macros>
|
|
|
9 <version_command>seqmagick --version</version_command>
|
|
|
10 <command interpreter="bash">
|
|
|
11 filter-wrapper.sh ${config}
|
|
|
12 </command>
|
|
|
13 <stdio>
|
|
|
14 <expand macro="basic_errors"/>
|
|
|
15 </stdio>
|
|
|
16 <inputs>
|
|
|
17 <!-- TODO: can take either fasta+qual or fastq -->
|
|
|
18 <param name="plate_id" type="integer" value="1" label="Plate number"/>
|
|
|
19 <param name="zone_id" type="integer" value="1" label="Zone number"/>
|
|
|
20 <param name="raw_seqs" type="data" format="fasta" label="Unfiltered sequences"/>
|
|
|
21 <param name="input_qual" type="data" format="qual" label="Sequence quality data"/>
|
|
|
22 <!-- TODO: handle MID format for multi-sample sequencing; see http://qiime.org/scripts/split_libraries.html -->
|
|
|
23 <param name="barcodes" type="data" format="csv" label="Barcodes"/>
|
|
|
24 <param name="primer" type="text" label="Primer" value="GCGGACTACCVGGGTATCTAAT" area="True" size="1x40"/>
|
|
|
25 <param name="min_length" type="integer" min="100" max="1000" value="350" label="Minimum sequence length"/>
|
|
|
26 <param name="min_quality" type="integer" min="0" max="63" value="35" label="Minimum mean sequence quality"/>
|
|
|
27 <param name="reverse_complement" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Reads uniformly correspond to negative strands"/>
|
|
|
28 </inputs>
|
|
|
29 <outputs>
|
|
|
30 <data name="filtered_seqs" format="fasta" label="Filtered sequences"/>
|
|
|
31 <data name="filter_report" format="tabular" label="Filtering report"/>
|
|
|
32 <data name="filter_details" format="data" label="Filtering details"/>
|
|
|
33 <data name="split_map" format="csv" label="Read-to-specimen map"/>
|
|
|
34 </outputs>
|
|
|
35 <configfiles>
|
|
|
36 <configfile name="config">
|
|
|
37 RAW_SEQS="${raw_seqs}"
|
|
|
38 INPUT_QUAL="${input_qual}"
|
|
|
39 BARCODES="${barcodes}"
|
|
|
40 PRIMER="${primer}"
|
|
|
41 MIN_LENGTH="${min_length}"
|
|
|
42 MIN_QUALITY="${min_quality}"
|
|
|
43 REVERSE_COMPLEMENT="${reverse_complement}"
|
|
|
44
|
|
|
45 FILTERED_SEQS="${filtered_seqs}"
|
|
|
46 FILTER_REPORT="${filter_report}"
|
|
|
47 FILTER_DETAILS="${filter_details}"
|
|
|
48 SPLIT_MAP="${split_map}"
|
|
|
49 </configfile>
|
|
|
50 </configfiles>
|
|
|
51 <!-- The contents of the help tag is parsed as reStructuredText. Please see
|
|
|
52 help-template.rst for examples of commonly-used sections in other Galaxy
|
|
|
53 tools. -->
|
|
|
54 <help>
|
|
|
55
|
|
|
56 .. class:: infomark
|
|
|
57
|
|
|
58 **What it does**
|
|
|
59
|
|
|
60 This tool truncates and removes sequences that don’t match a set of quality
|
|
|
61 criteria, as well as mapping sequence barcodes to specimens. It takes input
|
|
|
62 sequences in FASTA format and a quality file, and outputs the filtered
|
|
|
63 sequences as well as a filtering summary.
|
|
|
64
|
|
|
65 The default quality filter settings are:
|
|
|
66
|
|
|
67 +---------------------------+------+
|
|
|
68 |parameter |value |
|
|
|
69 +===========================+======+
|
|
|
70 |--min-length |350 |
|
|
|
71 +---------------------------+------+
|
|
|
72 |--min-mean-quality |35 |
|
|
|
73 +---------------------------+------+
|
|
|
74 |--quality-window |30 |
|
|
|
75 +---------------------------+------+
|
|
|
76 |--quality-window-prop |0.9 |
|
|
|
77 +---------------------------+------+
|
|
|
78 |--quality-window-mean-qual |15 |
|
|
|
79 +---------------------------+------+
|
|
|
80
|
|
|
81 See seqmagick's `quality filter documentation`_ for full explanations of these
|
|
|
82 parameters.
|
|
|
83
|
|
|
84 .. _quality filter documentation: http://fhcrc.github.io/seqmagick/quality_filter.html
|
|
|
85
|
|
|
86 </help>
|
|
|
87 </tool>
|