annotate scythe.xml @ 1:b0276d1141fe default tip

Fix test case
author Jim Johnson <jj@umn.edu>
date Thu, 30 Jan 2014 13:10:12 -0600
parents 08439b004404
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
08439b004404 Uploaded
jjohnson
parents:
diff changeset
1 <tool id="scythe" name="Scythe">
08439b004404 Uploaded
jjohnson
parents:
diff changeset
2 <description>Trimming adapters/contaminants using a Naive Bayesian classifier</description>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
3 <requirements>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
4 <requirement version="0.991">scythe</requirement>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
5 </requirements>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
6 <command>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
7 scythe --quiet -a $adapter_file
08439b004404 Uploaded
jjohnson
parents:
diff changeset
8
08439b004404 Uploaded
jjohnson
parents:
diff changeset
9 #if $input_fastq.ext == "fastq":
08439b004404 Uploaded
jjohnson
parents:
diff changeset
10 -q sanger
08439b004404 Uploaded
jjohnson
parents:
diff changeset
11 #else if $input_fastq.ext == "fastqsanger":
08439b004404 Uploaded
jjohnson
parents:
diff changeset
12 -q sanger
08439b004404 Uploaded
jjohnson
parents:
diff changeset
13 #else if $input_fastq.ext == "fastqillumina":
08439b004404 Uploaded
jjohnson
parents:
diff changeset
14 -q illumina
08439b004404 Uploaded
jjohnson
parents:
diff changeset
15 #else if $input_fastq.ext == "fastqsolexa":
08439b004404 Uploaded
jjohnson
parents:
diff changeset
16 -q solexa
08439b004404 Uploaded
jjohnson
parents:
diff changeset
17 #end if
08439b004404 Uploaded
jjohnson
parents:
diff changeset
18
08439b004404 Uploaded
jjohnson
parents:
diff changeset
19 #if str($add_tag) == "add_tag_true":
08439b004404 Uploaded
jjohnson
parents:
diff changeset
20 -t
08439b004404 Uploaded
jjohnson
parents:
diff changeset
21 #end if
08439b004404 Uploaded
jjohnson
parents:
diff changeset
22
08439b004404 Uploaded
jjohnson
parents:
diff changeset
23 #if str($prior) != "":
08439b004404 Uploaded
jjohnson
parents:
diff changeset
24 -p $prior
08439b004404 Uploaded
jjohnson
parents:
diff changeset
25 #end if
08439b004404 Uploaded
jjohnson
parents:
diff changeset
26
08439b004404 Uploaded
jjohnson
parents:
diff changeset
27 #if str($min_match) != "":
08439b004404 Uploaded
jjohnson
parents:
diff changeset
28 -n $min_match
08439b004404 Uploaded
jjohnson
parents:
diff changeset
29 #end if
08439b004404 Uploaded
jjohnson
parents:
diff changeset
30
08439b004404 Uploaded
jjohnson
parents:
diff changeset
31 #if str($min_keep) != "":
08439b004404 Uploaded
jjohnson
parents:
diff changeset
32 -M $min_keep
08439b004404 Uploaded
jjohnson
parents:
diff changeset
33 #end if
08439b004404 Uploaded
jjohnson
parents:
diff changeset
34
08439b004404 Uploaded
jjohnson
parents:
diff changeset
35 #if str($matches_file) == "matches_file_true":
08439b004404 Uploaded
jjohnson
parents:
diff changeset
36 -m $output_matches
08439b004404 Uploaded
jjohnson
parents:
diff changeset
37 #end if
08439b004404 Uploaded
jjohnson
parents:
diff changeset
38
08439b004404 Uploaded
jjohnson
parents:
diff changeset
39 -o $output_trimmed $input_fastq 2> /dev/null
08439b004404 Uploaded
jjohnson
parents:
diff changeset
40 </command>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
41
08439b004404 Uploaded
jjohnson
parents:
diff changeset
42 <inputs>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
43 <param format="fastq, fastqsanger, fastqillumina, fastqsolexa" name="input_fastq" type="data" optional="false" label="FastQ Reads" help="Note: Scythe will infer the quality type of the file from its datatype. I.e., if the datatype is fastqsanger, then the quality type is sanger. The default is fastqsanger."/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
44
08439b004404 Uploaded
jjohnson
parents:
diff changeset
45 <param format="fasta" name="adapter_file" type="data" optional="false" label="Adapter/Contaminant file (in fasta format)"/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
46
08439b004404 Uploaded
jjohnson
parents:
diff changeset
47 <param name="add_tag" type="boolean" checked="false" truevalue="add_tag_true" falsevalue="add_tag_false" label="Add a tag to the header indicating that Scythe cut a sequence?"/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
48
08439b004404 Uploaded
jjohnson
parents:
diff changeset
49 <param name="matches_file" type="boolean" checked="false" truevalue="matches_file_true" falsevalue="matches_file_false" label="Also output another file with details about adapter/contaminant matches?"/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
50
08439b004404 Uploaded
jjohnson
parents:
diff changeset
51 <param name="prior" value="0.3" type="float" optional="true" label="Prior" help="The prior contamination rate">
08439b004404 Uploaded
jjohnson
parents:
diff changeset
52 <validator type="in_range" min="0" message="Minimum value is 0"/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
53 </param>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
54
08439b004404 Uploaded
jjohnson
parents:
diff changeset
55 <param name="min_match" value="5" type="integer" optional="true" label="Smallest length adapter/contaminant to consider">
08439b004404 Uploaded
jjohnson
parents:
diff changeset
56 <validator type="in_range" min="0" message="Minimum value is 0"/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
57 </param>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
58
08439b004404 Uploaded
jjohnson
parents:
diff changeset
59 <param name="min_keep" value="35" type="integer" optional="true" label="Filter sequences less than this length (after trimming)">
08439b004404 Uploaded
jjohnson
parents:
diff changeset
60 <validator type="in_range" min="0" message="Minimum value is 0"/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
61 </param>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
62 </inputs>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
63
08439b004404 Uploaded
jjohnson
parents:
diff changeset
64 <outputs>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
65 <data format_source="input_fastq" name="output_trimmed" label="Adapter/Contaminant Trimmed FastQ using ${tool.name} on ${on_string}"/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
66
08439b004404 Uploaded
jjohnson
parents:
diff changeset
67 <data format="txt" name="output_matches" label="Matches of Adapters/Contaminants using ${tool.name} on ${on_string}">
08439b004404 Uploaded
jjohnson
parents:
diff changeset
68 <filter>(matches_file == True)</filter>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
69 </data>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
70 </outputs>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
71
08439b004404 Uploaded
jjohnson
parents:
diff changeset
72 <stdio>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
73 <exit_code range="1" level="fatal" description="scythe failed" />
08439b004404 Uploaded
jjohnson
parents:
diff changeset
74 </stdio>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
75
08439b004404 Uploaded
jjohnson
parents:
diff changeset
76
08439b004404 Uploaded
jjohnson
parents:
diff changeset
77 <tests>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
78 <test>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
79 <param name="input_fastq" value="reads.fastq" ftype="fastqillumina"/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
80 <param name="adapter_file" value="illumina_adapters.fa" ftype="fasta"/>
1
b0276d1141fe Fix test case
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
81 <param name="add_tag" value="False"/>
0
08439b004404 Uploaded
jjohnson
parents:
diff changeset
82 <param name="matches_file" value="True"/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
83 <param name="prior" value="0.3"/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
84 <param name="min_match" value="5"/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
85 <param name="min_keep" value="35"/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
86 <output name="output_trimmed" file="trimmed_sequences.fastq" ftype="fastqillumina"/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
87 <output name="output_matches" file="matches.txt" ftype="txt"/>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
88 </test>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
89 </tests>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
90
08439b004404 Uploaded
jjohnson
parents:
diff changeset
91 <help>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
92 Scythe uses a Naive Bayesian approach to classify contaminant substrings in sequence reads. It considers quality information, which can make it robust in picking out 3'-end adapters, which often include poor quality bases.
08439b004404 Uploaded
jjohnson
parents:
diff changeset
93
08439b004404 Uploaded
jjohnson
parents:
diff changeset
94 Most next generation sequencing reads have deteriorating quality towards the 3'-end. It's common for a quality-based trimmer to be employed before mapping, assemblies, and analysis to remove these poor quality bases. However, quality-based trimming could remove bases that are helpful in identifying (and removing) 3'-end adapter contaminants. Thus, it is recommended you run Scythe before quality-based trimming, as part of a read quality control pipeline.
08439b004404 Uploaded
jjohnson
parents:
diff changeset
95
08439b004404 Uploaded
jjohnson
parents:
diff changeset
96 The Bayesian approach Scythe uses compares two likelihood models: the probability of seeing the matches in a sequence given contamination, and not given contamination. Given that the read is contaminated, the probability of seeing a certain number of matches and mistmatches is a function of the quality of the sequence. Given the read is not contaminated (and is thus assumed to be random sequence), the probability of seeing a certain number of matches and mismatches is chance. The posterior is calculated across both these likelihood models, and the class (contaminated or not contaminated) with the maximum posterior probability is the class selected.
08439b004404 Uploaded
jjohnson
parents:
diff changeset
97
08439b004404 Uploaded
jjohnson
parents:
diff changeset
98 Scythe will infer the quality type from the datatype of the file.
08439b004404 Uploaded
jjohnson
parents:
diff changeset
99 </help>
08439b004404 Uploaded
jjohnson
parents:
diff changeset
100
08439b004404 Uploaded
jjohnson
parents:
diff changeset
101 </tool>