Mercurial > repos > iuc > pear
comparison pear.xml @ 0:b0e3a2df7897 draft
Uploaded
| author | iuc |
|---|---|
| date | Wed, 11 Feb 2015 07:52:43 -0500 |
| parents | |
| children | 361d2cb38860 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b0e3a2df7897 |
|---|---|
| 1 <tool id="iuc_pear" name="Pear" version="0.9.6.0"> | |
| 2 <description>Paired-End read merger</description> | |
| 3 <!--<version_command>bismark version</version_command>--> | |
| 4 <requirements> | |
| 5 <requirement type="package" version="0.9.6">pear</requirement> | |
| 6 </requirements> | |
| 7 <stdio> | |
| 8 <exit_code range="1:" /> | |
| 9 <exit_code range=":-1" /> | |
| 10 <regex match="Error:" /> | |
| 11 <regex match="Exception:" /> | |
| 12 </stdio> | |
| 13 <command> | |
| 14 <![CDATA[ | |
| 15 pear | |
| 16 -f $forward | |
| 17 -r $reverse | |
| 18 --output pear | |
| 19 --p-value $pvalue | |
| 20 --min-overlap $min_overlap | |
| 21 #if int($max_assembly_length) > 0: | |
| 22 --max-asm-length $max_assembly_length | |
| 23 #end if | |
| 24 --min-asm-length $min_assembly_length | |
| 25 --min-trim-length $min_trim_length | |
| 26 --quality-theshold $quality_threshold | |
| 27 --max-uncalled-base $max_uncalled_base | |
| 28 --test-method $test_method | |
| 29 --empirical-freqs $empirical_freqs | |
| 30 -j "\${GALAXY_SLOTS:-8}" | |
| 31 --score-method $score_method | |
| 32 --cap $cap | |
| 33 $nbase | |
| 34 ]]> | |
| 35 </command> | |
| 36 <inputs> | |
| 37 <param name="forward" type="data" format="fastqillumina, fastqsanger, fastq" | |
| 38 label="Specify the name of file that contains the forward paired-end reads" help="-f" /> | |
| 39 <param name="reverse" type="data" format="fastqillumina, fastqsanger, fastq" | |
| 40 label="Specify the name of file that contains the reverse paired-end reads" help="-r" /> | |
| 41 | |
| 42 <!-- optional --> | |
| 43 <param name="pvalue" type="float" value="0.01" min="0" optional="True" max="1" label="Specify a p-value for the statistical test" | |
| 44 help="If the computed p-value of a possible assembly exceeds the specified p-value then the paired-end read will not be assembled. Setting 1.0 disables the test. (--p-value)" /> | |
| 45 | |
| 46 <param name="min_overlap" type="integer" value="10" optional="True" label="Specify the minimum overlap size" | |
| 47 help="The minimum overlap may be set to 1 when the statistical test is used. However, further restricting the minimum overlap size to a proper value may reduce false-positive assembles. (--min-overlap)" /> | |
| 48 | |
| 49 <param name="max_assembly_length" type="integer" value="0" optional="True" label="Specify the maximum possible length of the assembled sequences" | |
| 50 help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary long. (--max-assembly-length)" /> | |
| 51 | |
| 52 <param name="min_assembly_length" type="integer" value="50" optional="True" label="Specify the minimum possible length of the assembled sequences" | |
| 53 help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary short. (--min-assembly-length)" /> | |
| 54 | |
| 55 <param name="min_trim_length" type="integer" value="1" optional="True" label="Specify the minimum length of reads after trimming the low quality part" | |
| 56 help="See option -q. (--min-trim-length)" /> | |
| 57 | |
| 58 <param name="quality_threshold" type="integer" value="0" optional="True" label="Specify the quality score threshold for trimming the low quality part of a read" | |
| 59 help="If the quality scores of two consecutive bases are strictly less than the specified threshold, the rest of the read will be trimmed. (--quality-threshold)" /> | |
| 60 | |
| 61 <param name="max_uncalled_base" type="float" value="1.0" min="0" optional="True" max="1" label="Specify the maximal proportion of uncalled bases in a read" | |
| 62 help="Setting this value to 0 will cause PEAR to discard all reads containing uncalled bases. The other extreme setting is 1 which causes PEAR to process all reads independent on the number of uncalled bases. (--max-uncalled-base)" /> | |
| 63 | |
| 64 <param name="cap" type="integer" value="40" optional="True" label="Specify the upper bound for the resulting quality score" | |
| 65 help="If set to zero, capping is disabled. (--cap)" /> | |
| 66 | |
| 67 <param name="test_method" type="select" label="Specify the type of statistical test" help="(--test-method)"> | |
| 68 <option value="1" selected="True">Given the minimum allowed overlap, test using the highest OES (1)</option> | |
| 69 <option value="2">Use the acceptance probability (2)</option> | |
| 70 </param> | |
| 71 | |
| 72 <param name="empirical_freqs" type="boolean" truevalue="-e" falsevalue="" checked="false" | |
| 73 label="Disable empirical base frequencies" help="(--empirical-freqs)" /> | |
| 74 <param name="nbase" type="boolean" truevalue="--nbase" falsevalue="" checked="false" | |
| 75 label="Use N base if uncertain" help="When merging a base-pair that consists of two non-equal bases out of which none is degenerate, set the merged base to N and use the highest quality score of the two bases. (--nbase)" /> | |
| 76 | |
| 77 <param name="score_method" type="select" label="Specify the scoring method" help="(--score-method)"> | |
| 78 <option value="1">OES with +1 for match and -1 for mismatch</option> | |
| 79 <option value="2" selected="True">Assembly score (AS) use +1 for match and -1 for mismatch multiplied by base quality scores</option> | |
| 80 <option value="3">Ignore quality scores and use +1 for a match and -1 for a mismatch</option> | |
| 81 </param> | |
| 82 | |
| 83 <param name="outputs" type="select" display="checkboxes" multiple="True" label="Output files"> | |
| 84 <option value="assembled" selected="True">Assembled reads</option> | |
| 85 <option value="forward">Forward unassembled reads</option> | |
| 86 <option value="reverse">Reverse unassembled reads</option> | |
| 87 <option value="discarded">Discarded reads</option> | |
| 88 | |
| 89 <validator type="no_options" message="Please select at least one output file." /> | |
| 90 </param> | |
| 91 </inputs> | |
| 92 <outputs> | |
| 93 <data format="fastq" name="assembled_reads" from_work_dir="pear.assembled.fastq" label="${tool.name} on ${on_string}: Assembled reads"> | |
| 94 <filter>'assembled' in outputs</filter> | |
| 95 </data> | |
| 96 <data format="fastq" name="unassembled_forward_reads" from_work_dir="pear.unassembled.forward.fastq" label="${tool.name} on ${on_string}: Unassembled forward reads"> | |
| 97 <filter>'forward' in outputs</filter> | |
| 98 </data> | |
| 99 <data format="fastq" name="unassembled_reverse_reads" from_work_dir="pear.unassembled.reverse.fastq" label="${tool.name} on ${on_string}: Unassembled reverse reads"> | |
| 100 <filter>'reverse' in outputs</filter> | |
| 101 </data> | |
| 102 <data format="fastq" name="discarded_reads" from_work_dir="pear.discarded.fastq" label="${tool.name} on ${on_string}: Discarded reads"> | |
| 103 <filter>'discarded' in outputs</filter> | |
| 104 </data> | |
| 105 </outputs> | |
| 106 <tests> | |
| 107 <test> | |
| 108 <param name="forward" value="forward.fastq" ftype="fastq" /> | |
| 109 <param name="reverse" value="reverse.fastq" ftype="fastq" /> | |
| 110 <param name="min_overlap" value="10" /> | |
| 111 <param name="min_assembly_length" value="50" /> | |
| 112 <param name="cap" value="0" /> | |
| 113 <param name="outputs" value="assembled,forward" /> | |
| 114 <output name="assembled_reads" file="pear_assembled_results1.fastq" ftype="fastq"/> | |
| 115 <output name="unassembled_forward_reads" file="pear_unassembled_forward_results1.fastq" ftype="fastq"/> | |
| 116 </test> | |
| 117 </tests> | |
| 118 <help> | |
| 119 <![CDATA[ | |
| 120 | |
| 121 **What it does** | |
| 122 | |
| 123 PEAR is an ultrafast, memory-efficient and highly accurate pair-end read merger. | |
| 124 It is fully parallelized and can run with as low as just a few kilobytes of memory. | |
| 125 | |
| 126 | |
| 127 **Parameters** | |
| 128 | |
| 129 | |
| 130 Standard (mandatory): | |
| 131 -f, --forward-fastq <str> Forward paired-end FASTQ file. | |
| 132 -r, --reverse-fastq <str> Reverse paired-end FASTQ file. | |
| 133 | |
| 134 Optional: | |
| 135 -p, --p-value <float> Specify a p-value for the statistical test. If the computed | |
| 136 p-value of a possible assembly exceeds the specified p-value | |
| 137 then paired-end read will not be assembled. Valid options | |
| 138 are: 0.0001, 0.001, 0.01, 0.05 and 1.0. Setting 1.0 disables | |
| 139 the test. (default: 0.01) | |
| 140 -v, --min-overlap <int> Specify the minimum overlap size. The minimum overlap may be | |
| 141 set to 1 when the statistical test is used. However, further | |
| 142 restricting the minimum overlap size to a proper value may | |
| 143 reduce false-positive assembles. (default: 10) | |
| 144 -m, --max-assembly-length <int> Specify the maximum possible length of the assembled | |
| 145 sequences. Setting this value to 0 disables the restriction | |
| 146 and assembled sequences may be arbitrary long. (default: 0) | |
| 147 -n, --min-assembly-length <int> Specify the minimum possible length of the assembled | |
| 148 sequences. Setting this value to 0 disables the restriction | |
| 149 and assembled sequences may be arbitrary short. (default: | |
| 150 50) | |
| 151 -t, --min-trim-length <int> Specify the minimum length of reads after trimming the low | |
| 152 quality part (see option -q). (default: 1) | |
| 153 -q, --quality-threshold <int> Specify the quality score threshold for trimming the low | |
| 154 quality part of a read. If the quality scores of two | |
| 155 consecutive bases are strictly less than the specified | |
| 156 threshold, the rest of the read will be trimmed. (default: | |
| 157 0) | |
| 158 -u, --max-uncalled-base <float> Specify the maximal proportion of uncalled bases in a read. | |
| 159 Setting this value to 0 will cause PEAR to discard all reads | |
| 160 containing uncalled bases. The other extreme setting is 1 | |
| 161 which causes PEAR to process all reads independent on the | |
| 162 number of uncalled bases. (default: 1) | |
| 163 -g, --test-method <int> Specify the type of statistical test. Two options are | |
| 164 available. (default: 1) | |
| 165 1: Given the minimum allowed overlap, test using the highest | |
| 166 OES. Note that due to its discrete nature, this test usually | |
| 167 yields a lower p-value for the assembled read than the cut- | |
| 168 off (specified by -p). For example, setting the cut-off to | |
| 169 0.05 using this test, the assembled reads might have an | |
| 170 actual p-value of 0.02. | |
| 171 | |
| 172 2. Use the acceptance probability (m.a.p). This test methods | |
| 173 computes the same probability as test method 1. However, it | |
| 174 assumes that the minimal overlap is the observed overlap | |
| 175 with the highest OES, instead of the one specified by -v. | |
| 176 Therefore, this is not a valid statistical test and the | |
| 177 'p-value' is in fact the maximal probability for accepting | |
| 178 the assembly. Nevertheless, we observed in practice that for | |
| 179 the case the actual overlap sizes are relatively small, test | |
| 180 2 can correctly assemble more reads with only slightly | |
| 181 higher false-positive rate. | |
| 182 -e, --empirical-freqs Disable empirical base frequencies. (default: use empirical | |
| 183 base frequencies) | |
| 184 -s, --score-method <int> Specify the scoring method. (default: 2) | |
| 185 1. OES with +1 for match and -1 for mismatch. | |
| 186 2: Assembly score (AS). Use +1 for match and -1 for mismatch | |
| 187 multiplied by base quality scores. | |
| 188 3: Ignore quality scores and use +1 for a match and -1 for a | |
| 189 mismatch. | |
| 190 -c, --cap <int> Specify the upper bound for the resulting quality score. If | |
| 191 set to zero, capping is disabled. (default: 40) | |
| 192 -z, --nbase When merging a base-pair that consists of two non-equal | |
| 193 bases out of which none is degenerate, set the merged base | |
| 194 to N and use the highest quality score of the two bases | |
| 195 | |
| 196 | |
| 197 | |
| 198 | |
| 199 ]]> | |
| 200 </help> | |
| 201 <citations> | |
| 202 <citation type="doi">10.1093/bioinformatics/btt593</citation> | |
| 203 </citations> | |
| 204 </tool> |
