|
0
|
1 <tool id="iuc_pear" name="Pear" version="0.9.6.0">
|
|
|
2 <description>Paired-End read merger</description>
|
|
|
3 <!--<version_command>bismark version</version_command>-->
|
|
|
4 <requirements>
|
|
|
5 <requirement type="package" version="0.9.6">pear</requirement>
|
|
|
6 </requirements>
|
|
|
7 <stdio>
|
|
|
8 <exit_code range="1:" />
|
|
|
9 <exit_code range=":-1" />
|
|
|
10 <regex match="Error:" />
|
|
|
11 <regex match="Exception:" />
|
|
|
12 </stdio>
|
|
|
13 <command>
|
|
|
14 <![CDATA[
|
|
|
15 pear
|
|
|
16 -f $forward
|
|
|
17 -r $reverse
|
|
|
18 --output pear
|
|
|
19 --p-value $pvalue
|
|
|
20 --min-overlap $min_overlap
|
|
|
21 #if int($max_assembly_length) > 0:
|
|
|
22 --max-asm-length $max_assembly_length
|
|
|
23 #end if
|
|
|
24 --min-asm-length $min_assembly_length
|
|
|
25 --min-trim-length $min_trim_length
|
|
|
26 --quality-theshold $quality_threshold
|
|
|
27 --max-uncalled-base $max_uncalled_base
|
|
|
28 --test-method $test_method
|
|
|
29 --empirical-freqs $empirical_freqs
|
|
|
30 -j "\${GALAXY_SLOTS:-8}"
|
|
|
31 --score-method $score_method
|
|
|
32 --cap $cap
|
|
|
33 $nbase
|
|
|
34 ]]>
|
|
|
35 </command>
|
|
|
36 <inputs>
|
|
|
37 <param name="forward" type="data" format="fastqillumina, fastqsanger, fastq"
|
|
|
38 label="Specify the name of file that contains the forward paired-end reads" help="-f" />
|
|
|
39 <param name="reverse" type="data" format="fastqillumina, fastqsanger, fastq"
|
|
|
40 label="Specify the name of file that contains the reverse paired-end reads" help="-r" />
|
|
|
41
|
|
|
42 <!-- optional -->
|
|
|
43 <param name="pvalue" type="float" value="0.01" min="0" optional="True" max="1" label="Specify a p-value for the statistical test"
|
|
|
44 help="If the computed p-value of a possible assembly exceeds the specified p-value then the paired-end read will not be assembled. Setting 1.0 disables the test. (--p-value)" />
|
|
|
45
|
|
|
46 <param name="min_overlap" type="integer" value="10" optional="True" label="Specify the minimum overlap size"
|
|
|
47 help="The minimum overlap may be set to 1 when the statistical test is used. However, further restricting the minimum overlap size to a proper value may reduce false-positive assembles. (--min-overlap)" />
|
|
|
48
|
|
|
49 <param name="max_assembly_length" type="integer" value="0" optional="True" label="Specify the maximum possible length of the assembled sequences"
|
|
|
50 help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary long. (--max-assembly-length)" />
|
|
|
51
|
|
|
52 <param name="min_assembly_length" type="integer" value="50" optional="True" label="Specify the minimum possible length of the assembled sequences"
|
|
|
53 help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary short. (--min-assembly-length)" />
|
|
|
54
|
|
|
55 <param name="min_trim_length" type="integer" value="1" optional="True" label="Specify the minimum length of reads after trimming the low quality part"
|
|
|
56 help="See option -q. (--min-trim-length)" />
|
|
|
57
|
|
|
58 <param name="quality_threshold" type="integer" value="0" optional="True" label="Specify the quality score threshold for trimming the low quality part of a read"
|
|
|
59 help="If the quality scores of two consecutive bases are strictly less than the specified threshold, the rest of the read will be trimmed. (--quality-threshold)" />
|
|
|
60
|
|
|
61 <param name="max_uncalled_base" type="float" value="1.0" min="0" optional="True" max="1" label="Specify the maximal proportion of uncalled bases in a read"
|
|
|
62 help="Setting this value to 0 will cause PEAR to discard all reads containing uncalled bases. The other extreme setting is 1 which causes PEAR to process all reads independent on the number of uncalled bases. (--max-uncalled-base)" />
|
|
|
63
|
|
|
64 <param name="cap" type="integer" value="40" optional="True" label="Specify the upper bound for the resulting quality score"
|
|
|
65 help="If set to zero, capping is disabled. (--cap)" />
|
|
|
66
|
|
|
67 <param name="test_method" type="select" label="Specify the type of statistical test" help="(--test-method)">
|
|
|
68 <option value="1" selected="True">Given the minimum allowed overlap, test using the highest OES (1)</option>
|
|
|
69 <option value="2">Use the acceptance probability (2)</option>
|
|
|
70 </param>
|
|
|
71
|
|
|
72 <param name="empirical_freqs" type="boolean" truevalue="-e" falsevalue="" checked="false"
|
|
|
73 label="Disable empirical base frequencies" help="(--empirical-freqs)" />
|
|
|
74 <param name="nbase" type="boolean" truevalue="--nbase" falsevalue="" checked="false"
|
|
|
75 label="Use N base if uncertain" help="When merging a base-pair that consists of two non-equal bases out of which none is degenerate, set the merged base to N and use the highest quality score of the two bases. (--nbase)" />
|
|
|
76
|
|
|
77 <param name="score_method" type="select" label="Specify the scoring method" help="(--score-method)">
|
|
|
78 <option value="1">OES with +1 for match and -1 for mismatch</option>
|
|
|
79 <option value="2" selected="True">Assembly score (AS) use +1 for match and -1 for mismatch multiplied by base quality scores</option>
|
|
|
80 <option value="3">Ignore quality scores and use +1 for a match and -1 for a mismatch</option>
|
|
|
81 </param>
|
|
|
82
|
|
|
83 <param name="outputs" type="select" display="checkboxes" multiple="True" label="Output files">
|
|
|
84 <option value="assembled" selected="True">Assembled reads</option>
|
|
|
85 <option value="forward">Forward unassembled reads</option>
|
|
|
86 <option value="reverse">Reverse unassembled reads</option>
|
|
|
87 <option value="discarded">Discarded reads</option>
|
|
|
88
|
|
|
89 <validator type="no_options" message="Please select at least one output file." />
|
|
|
90 </param>
|
|
|
91 </inputs>
|
|
|
92 <outputs>
|
|
|
93 <data format="fastq" name="assembled_reads" from_work_dir="pear.assembled.fastq" label="${tool.name} on ${on_string}: Assembled reads">
|
|
|
94 <filter>'assembled' in outputs</filter>
|
|
|
95 </data>
|
|
|
96 <data format="fastq" name="unassembled_forward_reads" from_work_dir="pear.unassembled.forward.fastq" label="${tool.name} on ${on_string}: Unassembled forward reads">
|
|
|
97 <filter>'forward' in outputs</filter>
|
|
|
98 </data>
|
|
|
99 <data format="fastq" name="unassembled_reverse_reads" from_work_dir="pear.unassembled.reverse.fastq" label="${tool.name} on ${on_string}: Unassembled reverse reads">
|
|
|
100 <filter>'reverse' in outputs</filter>
|
|
|
101 </data>
|
|
|
102 <data format="fastq" name="discarded_reads" from_work_dir="pear.discarded.fastq" label="${tool.name} on ${on_string}: Discarded reads">
|
|
|
103 <filter>'discarded' in outputs</filter>
|
|
|
104 </data>
|
|
|
105 </outputs>
|
|
|
106 <tests>
|
|
|
107 <test>
|
|
|
108 <param name="forward" value="forward.fastq" ftype="fastq" />
|
|
|
109 <param name="reverse" value="reverse.fastq" ftype="fastq" />
|
|
|
110 <param name="min_overlap" value="10" />
|
|
|
111 <param name="min_assembly_length" value="50" />
|
|
|
112 <param name="cap" value="0" />
|
|
|
113 <param name="outputs" value="assembled,forward" />
|
|
|
114 <output name="assembled_reads" file="pear_assembled_results1.fastq" ftype="fastq"/>
|
|
|
115 <output name="unassembled_forward_reads" file="pear_unassembled_forward_results1.fastq" ftype="fastq"/>
|
|
|
116 </test>
|
|
|
117 </tests>
|
|
|
118 <help>
|
|
|
119 <![CDATA[
|
|
|
120
|
|
|
121 **What it does**
|
|
|
122
|
|
|
123 PEAR is an ultrafast, memory-efficient and highly accurate pair-end read merger.
|
|
|
124 It is fully parallelized and can run with as low as just a few kilobytes of memory.
|
|
|
125
|
|
|
126
|
|
|
127 **Parameters**
|
|
|
128
|
|
|
129
|
|
|
130 Standard (mandatory):
|
|
|
131 -f, --forward-fastq <str> Forward paired-end FASTQ file.
|
|
|
132 -r, --reverse-fastq <str> Reverse paired-end FASTQ file.
|
|
|
133
|
|
|
134 Optional:
|
|
|
135 -p, --p-value <float> Specify a p-value for the statistical test. If the computed
|
|
|
136 p-value of a possible assembly exceeds the specified p-value
|
|
|
137 then paired-end read will not be assembled. Valid options
|
|
|
138 are: 0.0001, 0.001, 0.01, 0.05 and 1.0. Setting 1.0 disables
|
|
|
139 the test. (default: 0.01)
|
|
|
140 -v, --min-overlap <int> Specify the minimum overlap size. The minimum overlap may be
|
|
|
141 set to 1 when the statistical test is used. However, further
|
|
|
142 restricting the minimum overlap size to a proper value may
|
|
|
143 reduce false-positive assembles. (default: 10)
|
|
|
144 -m, --max-assembly-length <int> Specify the maximum possible length of the assembled
|
|
|
145 sequences. Setting this value to 0 disables the restriction
|
|
|
146 and assembled sequences may be arbitrary long. (default: 0)
|
|
|
147 -n, --min-assembly-length <int> Specify the minimum possible length of the assembled
|
|
|
148 sequences. Setting this value to 0 disables the restriction
|
|
|
149 and assembled sequences may be arbitrary short. (default:
|
|
|
150 50)
|
|
|
151 -t, --min-trim-length <int> Specify the minimum length of reads after trimming the low
|
|
|
152 quality part (see option -q). (default: 1)
|
|
|
153 -q, --quality-threshold <int> Specify the quality score threshold for trimming the low
|
|
|
154 quality part of a read. If the quality scores of two
|
|
|
155 consecutive bases are strictly less than the specified
|
|
|
156 threshold, the rest of the read will be trimmed. (default:
|
|
|
157 0)
|
|
|
158 -u, --max-uncalled-base <float> Specify the maximal proportion of uncalled bases in a read.
|
|
|
159 Setting this value to 0 will cause PEAR to discard all reads
|
|
|
160 containing uncalled bases. The other extreme setting is 1
|
|
|
161 which causes PEAR to process all reads independent on the
|
|
|
162 number of uncalled bases. (default: 1)
|
|
|
163 -g, --test-method <int> Specify the type of statistical test. Two options are
|
|
|
164 available. (default: 1)
|
|
|
165 1: Given the minimum allowed overlap, test using the highest
|
|
|
166 OES. Note that due to its discrete nature, this test usually
|
|
|
167 yields a lower p-value for the assembled read than the cut-
|
|
|
168 off (specified by -p). For example, setting the cut-off to
|
|
|
169 0.05 using this test, the assembled reads might have an
|
|
|
170 actual p-value of 0.02.
|
|
|
171
|
|
|
172 2. Use the acceptance probability (m.a.p). This test methods
|
|
|
173 computes the same probability as test method 1. However, it
|
|
|
174 assumes that the minimal overlap is the observed overlap
|
|
|
175 with the highest OES, instead of the one specified by -v.
|
|
|
176 Therefore, this is not a valid statistical test and the
|
|
|
177 'p-value' is in fact the maximal probability for accepting
|
|
|
178 the assembly. Nevertheless, we observed in practice that for
|
|
|
179 the case the actual overlap sizes are relatively small, test
|
|
|
180 2 can correctly assemble more reads with only slightly
|
|
|
181 higher false-positive rate.
|
|
|
182 -e, --empirical-freqs Disable empirical base frequencies. (default: use empirical
|
|
|
183 base frequencies)
|
|
|
184 -s, --score-method <int> Specify the scoring method. (default: 2)
|
|
|
185 1. OES with +1 for match and -1 for mismatch.
|
|
|
186 2: Assembly score (AS). Use +1 for match and -1 for mismatch
|
|
|
187 multiplied by base quality scores.
|
|
|
188 3: Ignore quality scores and use +1 for a match and -1 for a
|
|
|
189 mismatch.
|
|
|
190 -c, --cap <int> Specify the upper bound for the resulting quality score. If
|
|
|
191 set to zero, capping is disabled. (default: 40)
|
|
|
192 -z, --nbase When merging a base-pair that consists of two non-equal
|
|
|
193 bases out of which none is degenerate, set the merged base
|
|
|
194 to N and use the highest quality score of the two bases
|
|
|
195
|
|
|
196
|
|
|
197
|
|
|
198
|
|
|
199 ]]>
|
|
|
200 </help>
|
|
|
201 <citations>
|
|
|
202 <citation type="doi">10.1093/bioinformatics/btt593</citation>
|
|
|
203 </citations>
|
|
|
204 </tool>
|