Mercurial > repos > iuc > pear
diff pear.xml @ 0:b0e3a2df7897 draft
Uploaded
| author | iuc |
|---|---|
| date | Wed, 11 Feb 2015 07:52:43 -0500 |
| parents | |
| children | 361d2cb38860 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pear.xml Wed Feb 11 07:52:43 2015 -0500 @@ -0,0 +1,204 @@ +<tool id="iuc_pear" name="Pear" version="0.9.6.0"> + <description>Paired-End read merger</description> + <!--<version_command>bismark version</version_command>--> + <requirements> + <requirement type="package" version="0.9.6">pear</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + <command> +<![CDATA[ + pear + -f $forward + -r $reverse + --output pear + --p-value $pvalue + --min-overlap $min_overlap + #if int($max_assembly_length) > 0: + --max-asm-length $max_assembly_length + #end if + --min-asm-length $min_assembly_length + --min-trim-length $min_trim_length + --quality-theshold $quality_threshold + --max-uncalled-base $max_uncalled_base + --test-method $test_method + --empirical-freqs $empirical_freqs + -j "\${GALAXY_SLOTS:-8}" + --score-method $score_method + --cap $cap + $nbase +]]> + </command> + <inputs> + <param name="forward" type="data" format="fastqillumina, fastqsanger, fastq" + label="Specify the name of file that contains the forward paired-end reads" help="-f" /> + <param name="reverse" type="data" format="fastqillumina, fastqsanger, fastq" + label="Specify the name of file that contains the reverse paired-end reads" help="-r" /> + + <!-- optional --> + <param name="pvalue" type="float" value="0.01" min="0" optional="True" max="1" label="Specify a p-value for the statistical test" + help="If the computed p-value of a possible assembly exceeds the specified p-value then the paired-end read will not be assembled. Setting 1.0 disables the test. (--p-value)" /> + + <param name="min_overlap" type="integer" value="10" optional="True" label="Specify the minimum overlap size" + help="The minimum overlap may be set to 1 when the statistical test is used. However, further restricting the minimum overlap size to a proper value may reduce false-positive assembles. (--min-overlap)" /> + + <param name="max_assembly_length" type="integer" value="0" optional="True" label="Specify the maximum possible length of the assembled sequences" + help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary long. (--max-assembly-length)" /> + + <param name="min_assembly_length" type="integer" value="50" optional="True" label="Specify the minimum possible length of the assembled sequences" + help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary short. (--min-assembly-length)" /> + + <param name="min_trim_length" type="integer" value="1" optional="True" label="Specify the minimum length of reads after trimming the low quality part" + help="See option -q. (--min-trim-length)" /> + + <param name="quality_threshold" type="integer" value="0" optional="True" label="Specify the quality score threshold for trimming the low quality part of a read" + help="If the quality scores of two consecutive bases are strictly less than the specified threshold, the rest of the read will be trimmed. (--quality-threshold)" /> + + <param name="max_uncalled_base" type="float" value="1.0" min="0" optional="True" max="1" label="Specify the maximal proportion of uncalled bases in a read" + help="Setting this value to 0 will cause PEAR to discard all reads containing uncalled bases. The other extreme setting is 1 which causes PEAR to process all reads independent on the number of uncalled bases. (--max-uncalled-base)" /> + + <param name="cap" type="integer" value="40" optional="True" label="Specify the upper bound for the resulting quality score" + help="If set to zero, capping is disabled. (--cap)" /> + + <param name="test_method" type="select" label="Specify the type of statistical test" help="(--test-method)"> + <option value="1" selected="True">Given the minimum allowed overlap, test using the highest OES (1)</option> + <option value="2">Use the acceptance probability (2)</option> + </param> + + <param name="empirical_freqs" type="boolean" truevalue="-e" falsevalue="" checked="false" + label="Disable empirical base frequencies" help="(--empirical-freqs)" /> + <param name="nbase" type="boolean" truevalue="--nbase" falsevalue="" checked="false" + label="Use N base if uncertain" help="When merging a base-pair that consists of two non-equal bases out of which none is degenerate, set the merged base to N and use the highest quality score of the two bases. (--nbase)" /> + + <param name="score_method" type="select" label="Specify the scoring method" help="(--score-method)"> + <option value="1">OES with +1 for match and -1 for mismatch</option> + <option value="2" selected="True">Assembly score (AS) use +1 for match and -1 for mismatch multiplied by base quality scores</option> + <option value="3">Ignore quality scores and use +1 for a match and -1 for a mismatch</option> + </param> + + <param name="outputs" type="select" display="checkboxes" multiple="True" label="Output files"> + <option value="assembled" selected="True">Assembled reads</option> + <option value="forward">Forward unassembled reads</option> + <option value="reverse">Reverse unassembled reads</option> + <option value="discarded">Discarded reads</option> + + <validator type="no_options" message="Please select at least one output file." /> + </param> + </inputs> + <outputs> + <data format="fastq" name="assembled_reads" from_work_dir="pear.assembled.fastq" label="${tool.name} on ${on_string}: Assembled reads"> + <filter>'assembled' in outputs</filter> + </data> + <data format="fastq" name="unassembled_forward_reads" from_work_dir="pear.unassembled.forward.fastq" label="${tool.name} on ${on_string}: Unassembled forward reads"> + <filter>'forward' in outputs</filter> + </data> + <data format="fastq" name="unassembled_reverse_reads" from_work_dir="pear.unassembled.reverse.fastq" label="${tool.name} on ${on_string}: Unassembled reverse reads"> + <filter>'reverse' in outputs</filter> + </data> + <data format="fastq" name="discarded_reads" from_work_dir="pear.discarded.fastq" label="${tool.name} on ${on_string}: Discarded reads"> + <filter>'discarded' in outputs</filter> + </data> + </outputs> + <tests> + <test> + <param name="forward" value="forward.fastq" ftype="fastq" /> + <param name="reverse" value="reverse.fastq" ftype="fastq" /> + <param name="min_overlap" value="10" /> + <param name="min_assembly_length" value="50" /> + <param name="cap" value="0" /> + <param name="outputs" value="assembled,forward" /> + <output name="assembled_reads" file="pear_assembled_results1.fastq" ftype="fastq"/> + <output name="unassembled_forward_reads" file="pear_unassembled_forward_results1.fastq" ftype="fastq"/> + </test> + </tests> + <help> +<![CDATA[ + +**What it does** + +PEAR is an ultrafast, memory-efficient and highly accurate pair-end read merger. +It is fully parallelized and can run with as low as just a few kilobytes of memory. + + +**Parameters** + + +Standard (mandatory): + -f, --forward-fastq <str> Forward paired-end FASTQ file. + -r, --reverse-fastq <str> Reverse paired-end FASTQ file. + +Optional: + -p, --p-value <float> Specify a p-value for the statistical test. If the computed + p-value of a possible assembly exceeds the specified p-value + then paired-end read will not be assembled. Valid options + are: 0.0001, 0.001, 0.01, 0.05 and 1.0. Setting 1.0 disables + the test. (default: 0.01) + -v, --min-overlap <int> Specify the minimum overlap size. The minimum overlap may be + set to 1 when the statistical test is used. However, further + restricting the minimum overlap size to a proper value may + reduce false-positive assembles. (default: 10) + -m, --max-assembly-length <int> Specify the maximum possible length of the assembled + sequences. Setting this value to 0 disables the restriction + and assembled sequences may be arbitrary long. (default: 0) + -n, --min-assembly-length <int> Specify the minimum possible length of the assembled + sequences. Setting this value to 0 disables the restriction + and assembled sequences may be arbitrary short. (default: + 50) + -t, --min-trim-length <int> Specify the minimum length of reads after trimming the low + quality part (see option -q). (default: 1) + -q, --quality-threshold <int> Specify the quality score threshold for trimming the low + quality part of a read. If the quality scores of two + consecutive bases are strictly less than the specified + threshold, the rest of the read will be trimmed. (default: + 0) + -u, --max-uncalled-base <float> Specify the maximal proportion of uncalled bases in a read. + Setting this value to 0 will cause PEAR to discard all reads + containing uncalled bases. The other extreme setting is 1 + which causes PEAR to process all reads independent on the + number of uncalled bases. (default: 1) + -g, --test-method <int> Specify the type of statistical test. Two options are + available. (default: 1) + 1: Given the minimum allowed overlap, test using the highest + OES. Note that due to its discrete nature, this test usually + yields a lower p-value for the assembled read than the cut- + off (specified by -p). For example, setting the cut-off to + 0.05 using this test, the assembled reads might have an + actual p-value of 0.02. + + 2. Use the acceptance probability (m.a.p). This test methods + computes the same probability as test method 1. However, it + assumes that the minimal overlap is the observed overlap + with the highest OES, instead of the one specified by -v. + Therefore, this is not a valid statistical test and the + 'p-value' is in fact the maximal probability for accepting + the assembly. Nevertheless, we observed in practice that for + the case the actual overlap sizes are relatively small, test + 2 can correctly assemble more reads with only slightly + higher false-positive rate. + -e, --empirical-freqs Disable empirical base frequencies. (default: use empirical + base frequencies) + -s, --score-method <int> Specify the scoring method. (default: 2) + 1. OES with +1 for match and -1 for mismatch. + 2: Assembly score (AS). Use +1 for match and -1 for mismatch + multiplied by base quality scores. + 3: Ignore quality scores and use +1 for a match and -1 for a + mismatch. + -c, --cap <int> Specify the upper bound for the resulting quality score. If + set to zero, capping is disabled. (default: 40) + -z, --nbase When merging a base-pair that consists of two non-equal + bases out of which none is degenerate, set the merged base + to N and use the highest quality score of the two bases + + + + +]]> + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btt593</citation> + </citations> +</tool>
