comparison pear.xml @ 0:b0e3a2df7897 draft

Uploaded
author iuc
date Wed, 11 Feb 2015 07:52:43 -0500
parents
children 361d2cb38860
comparison
equal deleted inserted replaced
-1:000000000000 0:b0e3a2df7897
1 <tool id="iuc_pear" name="Pear" version="0.9.6.0">
2 <description>Paired-End read merger</description>
3 <!--<version_command>bismark version</version_command>-->
4 <requirements>
5 <requirement type="package" version="0.9.6">pear</requirement>
6 </requirements>
7 <stdio>
8 <exit_code range="1:" />
9 <exit_code range=":-1" />
10 <regex match="Error:" />
11 <regex match="Exception:" />
12 </stdio>
13 <command>
14 <![CDATA[
15 pear
16 -f $forward
17 -r $reverse
18 --output pear
19 --p-value $pvalue
20 --min-overlap $min_overlap
21 #if int($max_assembly_length) > 0:
22 --max-asm-length $max_assembly_length
23 #end if
24 --min-asm-length $min_assembly_length
25 --min-trim-length $min_trim_length
26 --quality-theshold $quality_threshold
27 --max-uncalled-base $max_uncalled_base
28 --test-method $test_method
29 --empirical-freqs $empirical_freqs
30 -j "\${GALAXY_SLOTS:-8}"
31 --score-method $score_method
32 --cap $cap
33 $nbase
34 ]]>
35 </command>
36 <inputs>
37 <param name="forward" type="data" format="fastqillumina, fastqsanger, fastq"
38 label="Specify the name of file that contains the forward paired-end reads" help="-f" />
39 <param name="reverse" type="data" format="fastqillumina, fastqsanger, fastq"
40 label="Specify the name of file that contains the reverse paired-end reads" help="-r" />
41
42 <!-- optional -->
43 <param name="pvalue" type="float" value="0.01" min="0" optional="True" max="1" label="Specify a p-value for the statistical test"
44 help="If the computed p-value of a possible assembly exceeds the specified p-value then the paired-end read will not be assembled. Setting 1.0 disables the test. (--p-value)" />
45
46 <param name="min_overlap" type="integer" value="10" optional="True" label="Specify the minimum overlap size"
47 help="The minimum overlap may be set to 1 when the statistical test is used. However, further restricting the minimum overlap size to a proper value may reduce false-positive assembles. (--min-overlap)" />
48
49 <param name="max_assembly_length" type="integer" value="0" optional="True" label="Specify the maximum possible length of the assembled sequences"
50 help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary long. (--max-assembly-length)" />
51
52 <param name="min_assembly_length" type="integer" value="50" optional="True" label="Specify the minimum possible length of the assembled sequences"
53 help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary short. (--min-assembly-length)" />
54
55 <param name="min_trim_length" type="integer" value="1" optional="True" label="Specify the minimum length of reads after trimming the low quality part"
56 help="See option -q. (--min-trim-length)" />
57
58 <param name="quality_threshold" type="integer" value="0" optional="True" label="Specify the quality score threshold for trimming the low quality part of a read"
59 help="If the quality scores of two consecutive bases are strictly less than the specified threshold, the rest of the read will be trimmed. (--quality-threshold)" />
60
61 <param name="max_uncalled_base" type="float" value="1.0" min="0" optional="True" max="1" label="Specify the maximal proportion of uncalled bases in a read"
62 help="Setting this value to 0 will cause PEAR to discard all reads containing uncalled bases. The other extreme setting is 1 which causes PEAR to process all reads independent on the number of uncalled bases. (--max-uncalled-base)" />
63
64 <param name="cap" type="integer" value="40" optional="True" label="Specify the upper bound for the resulting quality score"
65 help="If set to zero, capping is disabled. (--cap)" />
66
67 <param name="test_method" type="select" label="Specify the type of statistical test" help="(--test-method)">
68 <option value="1" selected="True">Given the minimum allowed overlap, test using the highest OES (1)</option>
69 <option value="2">Use the acceptance probability (2)</option>
70 </param>
71
72 <param name="empirical_freqs" type="boolean" truevalue="-e" falsevalue="" checked="false"
73 label="Disable empirical base frequencies" help="(--empirical-freqs)" />
74 <param name="nbase" type="boolean" truevalue="--nbase" falsevalue="" checked="false"
75 label="Use N base if uncertain" help="When merging a base-pair that consists of two non-equal bases out of which none is degenerate, set the merged base to N and use the highest quality score of the two bases. (--nbase)" />
76
77 <param name="score_method" type="select" label="Specify the scoring method" help="(--score-method)">
78 <option value="1">OES with +1 for match and -1 for mismatch</option>
79 <option value="2" selected="True">Assembly score (AS) use +1 for match and -1 for mismatch multiplied by base quality scores</option>
80 <option value="3">Ignore quality scores and use +1 for a match and -1 for a mismatch</option>
81 </param>
82
83 <param name="outputs" type="select" display="checkboxes" multiple="True" label="Output files">
84 <option value="assembled" selected="True">Assembled reads</option>
85 <option value="forward">Forward unassembled reads</option>
86 <option value="reverse">Reverse unassembled reads</option>
87 <option value="discarded">Discarded reads</option>
88
89 <validator type="no_options" message="Please select at least one output file." />
90 </param>
91 </inputs>
92 <outputs>
93 <data format="fastq" name="assembled_reads" from_work_dir="pear.assembled.fastq" label="${tool.name} on ${on_string}: Assembled reads">
94 <filter>'assembled' in outputs</filter>
95 </data>
96 <data format="fastq" name="unassembled_forward_reads" from_work_dir="pear.unassembled.forward.fastq" label="${tool.name} on ${on_string}: Unassembled forward reads">
97 <filter>'forward' in outputs</filter>
98 </data>
99 <data format="fastq" name="unassembled_reverse_reads" from_work_dir="pear.unassembled.reverse.fastq" label="${tool.name} on ${on_string}: Unassembled reverse reads">
100 <filter>'reverse' in outputs</filter>
101 </data>
102 <data format="fastq" name="discarded_reads" from_work_dir="pear.discarded.fastq" label="${tool.name} on ${on_string}: Discarded reads">
103 <filter>'discarded' in outputs</filter>
104 </data>
105 </outputs>
106 <tests>
107 <test>
108 <param name="forward" value="forward.fastq" ftype="fastq" />
109 <param name="reverse" value="reverse.fastq" ftype="fastq" />
110 <param name="min_overlap" value="10" />
111 <param name="min_assembly_length" value="50" />
112 <param name="cap" value="0" />
113 <param name="outputs" value="assembled,forward" />
114 <output name="assembled_reads" file="pear_assembled_results1.fastq" ftype="fastq"/>
115 <output name="unassembled_forward_reads" file="pear_unassembled_forward_results1.fastq" ftype="fastq"/>
116 </test>
117 </tests>
118 <help>
119 <![CDATA[
120
121 **What it does**
122
123 PEAR is an ultrafast, memory-efficient and highly accurate pair-end read merger.
124 It is fully parallelized and can run with as low as just a few kilobytes of memory.
125
126
127 **Parameters**
128
129
130 Standard (mandatory):
131 -f, --forward-fastq <str> Forward paired-end FASTQ file.
132 -r, --reverse-fastq <str> Reverse paired-end FASTQ file.
133
134 Optional:
135 -p, --p-value <float> Specify a p-value for the statistical test. If the computed
136 p-value of a possible assembly exceeds the specified p-value
137 then paired-end read will not be assembled. Valid options
138 are: 0.0001, 0.001, 0.01, 0.05 and 1.0. Setting 1.0 disables
139 the test. (default: 0.01)
140 -v, --min-overlap <int> Specify the minimum overlap size. The minimum overlap may be
141 set to 1 when the statistical test is used. However, further
142 restricting the minimum overlap size to a proper value may
143 reduce false-positive assembles. (default: 10)
144 -m, --max-assembly-length <int> Specify the maximum possible length of the assembled
145 sequences. Setting this value to 0 disables the restriction
146 and assembled sequences may be arbitrary long. (default: 0)
147 -n, --min-assembly-length <int> Specify the minimum possible length of the assembled
148 sequences. Setting this value to 0 disables the restriction
149 and assembled sequences may be arbitrary short. (default:
150 50)
151 -t, --min-trim-length <int> Specify the minimum length of reads after trimming the low
152 quality part (see option -q). (default: 1)
153 -q, --quality-threshold <int> Specify the quality score threshold for trimming the low
154 quality part of a read. If the quality scores of two
155 consecutive bases are strictly less than the specified
156 threshold, the rest of the read will be trimmed. (default:
157 0)
158 -u, --max-uncalled-base <float> Specify the maximal proportion of uncalled bases in a read.
159 Setting this value to 0 will cause PEAR to discard all reads
160 containing uncalled bases. The other extreme setting is 1
161 which causes PEAR to process all reads independent on the
162 number of uncalled bases. (default: 1)
163 -g, --test-method <int> Specify the type of statistical test. Two options are
164 available. (default: 1)
165 1: Given the minimum allowed overlap, test using the highest
166 OES. Note that due to its discrete nature, this test usually
167 yields a lower p-value for the assembled read than the cut-
168 off (specified by -p). For example, setting the cut-off to
169 0.05 using this test, the assembled reads might have an
170 actual p-value of 0.02.
171
172 2. Use the acceptance probability (m.a.p). This test methods
173 computes the same probability as test method 1. However, it
174 assumes that the minimal overlap is the observed overlap
175 with the highest OES, instead of the one specified by -v.
176 Therefore, this is not a valid statistical test and the
177 'p-value' is in fact the maximal probability for accepting
178 the assembly. Nevertheless, we observed in practice that for
179 the case the actual overlap sizes are relatively small, test
180 2 can correctly assemble more reads with only slightly
181 higher false-positive rate.
182 -e, --empirical-freqs Disable empirical base frequencies. (default: use empirical
183 base frequencies)
184 -s, --score-method <int> Specify the scoring method. (default: 2)
185 1. OES with +1 for match and -1 for mismatch.
186 2: Assembly score (AS). Use +1 for match and -1 for mismatch
187 multiplied by base quality scores.
188 3: Ignore quality scores and use +1 for a match and -1 for a
189 mismatch.
190 -c, --cap <int> Specify the upper bound for the resulting quality score. If
191 set to zero, capping is disabled. (default: 40)
192 -z, --nbase When merging a base-pair that consists of two non-equal
193 bases out of which none is degenerate, set the merged base
194 to N and use the highest quality score of the two bases
195
196
197
198
199 ]]>
200 </help>
201 <citations>
202 <citation type="doi">10.1093/bioinformatics/btt593</citation>
203 </citations>
204 </tool>