Mercurial > repos > bebatut > prinseq
changeset 4:acaf871de2ea draft default tip
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/prinseq/ commit ade7dfd3c1f1c5369e338013c5ca2ec7115cafe6-dirty
author | bebatut |
---|---|
date | Wed, 27 Jan 2016 03:55:09 -0500 |
parents | 10ac5c567b73 |
children | |
files | README.md prinseq.xml tool_dependencies.xml |
diffstat | 3 files changed, 274 insertions(+), 157 deletions(-) [+] |
line wrap: on
line diff
--- a/README.md Tue Nov 17 06:02:09 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ -Galaxy wrapper for PRINSEQ -=========================== - -PRINSEQ is a tool for easy and rapid quality control and data processing of -metagenomic and metatranscriptomic datasets. This tool allow to process the -sequences with filtering and trimming. For more information, check the -[user manual](http://prinseq.sourceforge.net/manual.html) - -# Installation - -## Automated installation - -On a Galaxy instance, the wrapper can be automatically installed using the -ToolShed. This will automatically install the dependencies, configure the Galaxy -instance for the tool and data, ... - -## Manual installation - -For manual installation, the files `prinseq.xml` must be put in the `tools/prinseq/` - folder and add the XML files to Galaxy's `tool_conf.xml` (in `config` folder) as -normal: - -``` -<section name="Control quality" id="prinseq"> - <tool file="prinseq/prinseq.xml" /> -</section> -``` - -PRINSEQ must be installed somewhere on the system path. It can be done using: - -``` -planemo dependency_script ~/repositories/galaxytools/tools/prinseq/ -bash dep_install.sh -source env.sh -``` - -To test the Galaxy integration, the functional tests can be runned: - -``` -./run_tests.sh -sid prinseq -``` - -# Bug Reports - -Any bug can be filed in an issue [here](https://github.com/ASaiM/galaxytools/issues). - -# Developers - -A release can be pushed to the test or main "Galaxy Tool Shed", using the following -Planemo commands (with required Tool Shed access detailed in `~/.planemo.yml`): - -``` -planemo shed_update -t testtoolshed --check_diff ~/repositories/galaxytools/tools/prinseq/ -``` - -or: - -``` -planemo shed_update -t toolshed --check_diff ~/repositories/galaxytools/tools/prinseq/ -``` - -# License (Apache 2) - -This wrapper are released under Apache 2 License. See the [LICENSE file](https://github.com/ASaiM/galaxytools/blob/master/LICENSE) for details \ No newline at end of file
--- a/prinseq.xml Tue Nov 17 06:02:09 2015 -0500 +++ b/prinseq.xml Wed Jan 27 03:55:09 2016 -0500 @@ -25,8 +25,24 @@ <command> <![CDATA[ - perl \${PRINSEQ_DIR}/prinseq-lite.pl - -fastq $sequence_to_control_file + perl \${PRINSEQ_DIR}/prinseq-lite.pl + #if $seq_type.seq_type_opt == 'single': + -fastq "$seq_type.input_singles" + #if $seq_type.input_singles.ext == 'fastqillumina': + -phred64 + #end if + #else: + -fastq "$seq_type.input_mate1" + -fastq2 "$seq_type.input_mate2" + #if $seq_type.input_mate1.ext != $seq_type.input_mate2.ext: + #import sys + #silent sys.stderr.write( 'Both pairs from your paired-end library need to be from the same filetype.' ) + #end if + #if $seq_type.input_mate1.ext == 'fastqillumina': + -phred64 + #end if + #end if + -out_good good_sequences -out_bad rejected_sequences @@ -37,6 +53,7 @@ #if $min_length_filter_treatments.apply_min_length_filter_treatments : -min_len $min_length_filter_treatments.min_length_filter_treatment_value #end if + #set max_length_filter_treatments=$length_filter_treatments.max_length_filter_treatments #if $max_length_filter_treatments.apply_max_length_filter_treatments : -max_len $max_length_filter_treatments.max_length_filter_treatment_value @@ -49,16 +66,19 @@ #if $min_quality_filter_treatments.apply_min_quality_filter_treatments : -min_qual_score $min_quality_filter_treatments.min_quality_filter_treatment_value #end if + #set max_quality_filter_treatments=$quality_filter_treatments.max_quality_filter_treatments #if $max_quality_filter_treatments.apply_max_quality_filter_treatments : -max_qual_score $max_quality_filter_treatments.max_quality_filter_treatment_value #end if + #set mean_quality_filter_treatments=$quality_filter_treatments.mean_quality_filter_treatments #if $mean_quality_filter_treatments.apply_mean_quality_filter_treatments: #set min_mean_quality_filter_treatments=$mean_quality_filter_treatments.min_mean_quality_filter_treatments #if $min_mean_quality_filter_treatments.apply_min_mean_quality_filter_treatments: -min_qual_mean $min_mean_quality_filter_treatments.min_mean_quality_filter_treatment_value #end if + #set max_mean_quality_filter_treatments=$mean_quality_filter_treatments.max_mean_quality_filter_treatments #if $max_mean_quality_filter_treatments.apply_max_mean_quality_filter_treatments: -max_qual_mean $max_mean_quality_filter_treatments.max_mean_quality_filter_treatment_value @@ -74,22 +94,24 @@ #if $min_GC_perc_content_filter_treatments.apply_min_GC_perc_content_filter_treatments : -min_gc $min_GC_perc_content_filter_treatments.min_GC_perc_content_filter_treatment_value #end if - set max_GC_perc_content_filter_treatments=$GC_perc_content_filter_treatments.max_GC_perc_content_filter_treatments + + #set max_GC_perc_content_filter_treatments=$GC_perc_content_filter_treatments.max_GC_perc_content_filter_treatments #if $max_GC_perc_content_filter_treatments.apply_max_GC_perc_content_filter_treatments : -max_gc $max_GC_perc_content_filter_treatments.max_GC_perc_content_filter_treatment_value #end if #end if + #set N_number_content_filter_treatments=$base_content_filter_treatments.N_number_content_filter_treatments #if $N_number_content_filter_treatments.apply_N_number_content_filter_treatments : -ns_max_n $N_number_content_filter_treatments.N_number_content_filter_treatment_value #end if + #set N_percentage_content_filter_treatments=$base_content_filter_treatments.N_percentage_content_filter_treatments #if $N_percentage_content_filter_treatments.apply_N_percentage_content_filter_treatments : -ns_max_p $N_percentage_content_filter_treatments.N_percentage_content_filter_treatment_value #end if - #if $base_content_filter_treatments.apply_other_base_content_filter_treatments : - -noniupac - #end if + + $base_content_filter_treatments.apply_other_base_content_filter_treatments #end if #set complexity_filter_treatments=$filter_treatments.complexity_filter_treatments @@ -114,17 +136,20 @@ #if $left_position_trimming_treatments.apply_left_position_trimming_treatments : -trim_left $left_position_trimming_treatments.left_position_trimming_treatment_value #end if + #set right_position_trimming_treatments=$nb_position_trimming_treatments.right_position_trimming_treatments #if $right_position_trimming_treatments.apply_right_position_trimming_treatments : -trim_right $right_position_trimming_treatments.right_position_trimming_treatment_value #end if #end if + #set percentage_position_trimming_treatments=$position_trimming_treatments.percentage_position_trimming_treatments #if $percentage_position_trimming_treatments.apply_percentage_position_trimming_treatments : #set left_percentage_position_trimming_treatments=$percentage_position_trimming_treatments.left_percentage_position_trimming_treatments #if $left_percentage_position_trimming_treatments.apply_left_percentage_position_trimming_treatments : -trim_left_p $left_percentage_position_trimming_treatments.left_percentage_position_trimming_treatment_value #end if + #set right_percentage_position_trimming_treatments=$percentage_position_trimming_treatments.right_percentage_position_trimming_treatments #if $right_percentage_position_trimming_treatments.apply_right_percentage_position_trimming_treatments : -trim_right_p $right_percentage_position_trimming_treatments.right_percentage_position_trimming_treatment_value @@ -140,17 +165,20 @@ #if $left_a_t_tail_trimming_treatments.apply_left_a_t_tail_trimming_treatments : -trim_tail_left $left_a_t_tail_trimming_treatments.left_a_t_tail_trimming_treatment_value #end if + #set right_a_t_tail_trimming_treatments=$a_t_tail_trimming_treatments.right_a_t_tail_trimming_treatments - #if right_a_t_tail_trimming_treatments.apply_right_a_t_tail_trimming_treatments : + #if $right_a_t_tail_trimming_treatments.apply_right_a_t_tail_trimming_treatments : -trim_tail_right $right_a_t_tail_trimming_treatments.right_a_t_tail_trimming_treatment_value #end if #end if + #set ns_tail_trimming_treatments=$tail_trimming_treatments.ns_tail_trimming_treatments #if $ns_tail_trimming_treatments.apply_ns_tail_trimming_treatments : #set left_ns_tail_trimming_treatments=$ns_tail_trimming_treatments.left_ns_tail_trimming_treatments #if $left_ns_tail_trimming_treatments.apply_left_ns_tail_trimming_treatments : -trim_ns_left $left_ns_tail_trimming_treatments.left_ns_tail_trimming_treatment_value #end if + #set right_ns_tail_trimming_treatments=$ns_tail_trimming_treatments.right_ns_tail_trimming_treatments #if $right_ns_tail_trimming_treatments.apply_right_ns_tail_trimming_treatments : -trim_ns_right $right_ns_tail_trimming_treatments.right_ns_tail_trimming_treatment_value @@ -164,96 +192,130 @@ #if $left_quality_trimming_treatments.apply_left_quality_trimming_treatments : -trim_qual_left $left_quality_trimming_treatments.left_quality_trimming_treatment_value #end if + #set right_quality_trimming_treatments=$quality_trimming_treatments.right_quality_trimming_treatments #if $right_quality_trimming_treatments.apply_right_quality_trimming_treatments : -trim_qual_right $right_quality_trimming_treatments.right_quality_trimming_treatment_value #end if + -trim_qual_type $quality_trimming_treatments.type_quality_trimming_treatments -trim_qual_rule $quality_trimming_treatments.rule_quality_trimming_treatments -trim_qual_window $quality_trimming_treatments.window_quality_trimming_treatments -trim_qual_step $quality_trimming_treatments.step_quality_trimming_treatments #end if - #end if -]]> + #end if + + -graph_stats "$graph_stats" + -graph_data stats.gd + ; + + # perl \${PRINSEQ_DIR}/prinseq-graphs-noPCA.pl -i stats.gd -html_all -o stats_html + + +]]> </command> <inputs> - <param name="sequence_to_control_file" type="data" format="fastq" label="Input - sequence file" help="The file must be in fastq format for quality - scores"/> + <conditional name="seq_type"> + <param name="seq_type_opt" type="select" label="Is this library paired- or single-end?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + <param name="input_singles" type="data" format="fastqsanger,fastqillumina,fastq" label="FASTQ file" help="FASTQ files." /> + </when> + <when value="paired"> + <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq" label="FASTQ file" help="FASTQ files." /> + <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq" label="FASTQ file" help="FASTQ files." /> + </when> + </conditional> <conditional name="filter_treatments"> <param name='apply_filter_treatments' type='boolean' checked="true" - label="Apply filter treatments?" help=""/> + truevalue="true" falsevalue="false" label="Apply filter treatments?" + help=""/> <when value="true"> <conditional name="length_filter_treatments"> <param name='apply_length_filter_treatments' type='boolean' - checked="true" label="Filter sequence based on their - length?" help="By default, sequences smaller than 60 bp + checked="true" truevalue="true" falsevalue="false" + label="Filter sequence based on their length?" help="By + default, sequences smaller than 60 bp are removed. No top threshold is defined"/> <when value="true"> <conditional name="min_length_filter_treatments"> <param name='apply_min_length_filter_treatments' - type='boolean' checked="true" label="Filter too + type='boolean' checked="true" truevalue="true" + falsevalue="false" label="Filter too small sequences?" help="By default, sequences smaller than 60 bp are removed."/> <when value="true"> <param name="min_length_filter_treatment_value" type="integer" min="0" max="3000" value="60" label="Minimum length t - hreshold to conserve sequences" help=""/> + hreshold to conserve sequences" help="(-min_len)"/> </when> + <when value="false" /> </conditional> <conditional name="max_length_filter_treatments"> <param name='apply_max_length_filter_treatments' - type='boolean' checked="false" label="Filter too + type='boolean' truevalue="true" falsevalue="false" + checked="false" label="Filter too big sequences?" help="By default, no treatment based on a maximal length is made"/> <when value="true"> <param name="max_length_filter_treatment_value" type="integer" min="0" max="3000" value="1000" label="Maximal length - threshold to conserve sequences" help=""/> + threshold to conserve sequences" help="(-max_len)"/> </when> + <when value="false" /> </conditional> </when> + <when value="false" /> </conditional> <conditional name="quality_filter_treatments"> <param name='apply_quality_filter_treatments' type='boolean' - checked="true" label="Filter sequences based on quality + checked="true" truevalue="true" falsevalue="false" + label="Filter sequences based on quality score?" help="By default, sequences with a mean score below 15 are removed."/> <when value="true"> <conditional name="min_quality_filter_treatments"> <param name='apply_min_quality_filter_treatments' - type='boolean' checked="false" label="Filter sequences + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Filter sequences based on their minimum score?" help="By default, no treatment based on a minimum score is made"/> <when value="true"> <param name="min_quality_filter_treatment_value" type="integer" min="0" max="40" value="2" label="Minimum score threshold - to conserve sequences" help=""/> + to conserve sequences" help="(-min_qual_score)"/> </when> + <when value="false" /> </conditional> <conditional name="max_quality_filter_treatments"> <param name='apply_max_quality_filter_treatments' - type='boolean' checked="false" label="Filter sequences + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Filter sequences based on their maximum score?" help="By default, no treatment based on a maximum score is made"/> <when value="true"> <param name="max_quality_filter_treatment_value" type="integer" min="0" max="40" value="38" label="Maximum score - threshold to conserve sequences" help=""/> + threshold to conserve sequences" help="(-max_qual_score)"/> </when> + <when value="false" /> </conditional> <conditional name="mean_quality_filter_treatments"> <param name='apply_mean_quality_filter_treatments' - type='boolean' checked="true" label="Filter sequences + type='boolean' checked="true" truevalue="true" + falsevalue="false" label="Filter sequences based on their mean score?" help="By default, sequences with a mean score below 15 are removed."/> <when value="true"> <conditional name="min_mean_quality_filter_treatments"> <param name='apply_min_mean_quality_filter_treatments' - type='boolean' checked="true" label="Filter + type='boolean' checked="true" truevalue="true" + falsevalue="false" label="Filter sequences with too small mean score?" help="By default, sequences with a mean score below 15 are removed."/> @@ -262,12 +324,14 @@ type="integer" min="0" max="40" value="15" label="Minimum mean score threshold to conserve sequences" - help=""/> + help="(-min_qual_mean)"/> </when> + <when value="false" /> </conditional> <conditional name="max_mean_quality_filter_treatments"> <param name='apply_max_mean_quality_filter_treatments' - type='boolean' checked="false" label="Filter + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Filter sequences with too high mean score?" help="By default, no treatment based on a maximum mean score is made"/> @@ -276,29 +340,35 @@ type="integer" min="0" max="40" value="40" label="Maximum mean score threshold - to conserve sequences" help=""/> + to conserve sequences" help="(-max_qual_mean)"/> </when> + <when value="false" /> </conditional> </when> + <when value="false" /> </conditional> </when> + <when value="false" /> </conditional> <conditional name="base_content_filter_treatments"> <param name='apply_base_content_filter_treatments' - type='boolean' checked="true" label="Filter sequences - based on their base content?" help="By default, sequences + type='boolean' checked="true" truevalue="true" falsevalue="false" + label="Filter sequences based on their base content?" + help="By default, sequences with more than 2% of N bases are removed."/> <when value="true"> <conditional name="GC_perc_content_filter_treatments"> <param name='apply_GC_perc_content_filter_treatments' - type='boolean' checked="false" label="Filter + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Filter sequences based on their GC percentage?" help="By default, no treatment based on GC percentage is made."/> <when value="true"> <conditional name="min_GC_perc_content_filter_treatments"> <param name='apply_min_GC_perc_content_filter_treatments' - type='boolean' checked="false" label="Filter + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Filter sequences with too small GC percentage?" help="By default, no treatment based on GC percentage is made."/> @@ -307,12 +377,14 @@ type="integer" min="0" max="100" value="10" label="Minimal GC percentage threshold to conserve - sequences" help=""/> + sequences" help="(-min_gc)"/> </when> + <when value="false" /> </conditional> <conditional name="max_GC_perc_content_filter_treatments"> <param name='apply_max_GC_perc_content_filter_treatments' - type='boolean' checked="false" label="Filter + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Filter sequences with too high GC percentage?" help="By default, no treatment based on GC percentage is made."/> @@ -321,27 +393,32 @@ type="integer" min="0" max="100" value="90" label="Maximal GC percentage threshold to conserve - sequences" help=""/> + sequences" help="(-max_gc)"/> </when> + <when value="false" /> </conditional> </when> + <when value="false" /> </conditional> <conditional name="N_number_content_filter_treatments"> <param name='apply_N_number_content_filter_treatments' - type='boolean' checked="false" label="Filter - sequences based on their number of N bases?" + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Filter sequences based + on their number of N bases?" help="By default, no treatment based on N number is made."/> <when value="true"> <param name="N_number_content_filter_treatment_value" type="integer" min="0" max="3000" value="10" label="Maximal N number - threshold to conserve sequences" help=""/> + threshold to conserve sequences" help="(-ns_max_n)"/> </when> + <when value="false" /> </conditional> <conditional name="N_percentage_content_filter_treatments"> <param name='apply_N_percentage_content_filter_treatments' - type='boolean' checked="true" label="Filter sequences + type='boolean' checked="true" truevalue="true" + falsevalue="false" label="Filter sequences based on their percentage of N bases?" help="By default, sequences with more than 2% of N bases are removed."/> @@ -349,24 +426,28 @@ <param name="N_percentage_content_filter_treatment_value" type="integer" min="0" max="100" value="2" label="Maximal N percentage threshold - to conserve sequences" help=""/> + to conserve sequences" help="(-ns_max_p)"/> </when> + <when value="false" /> </conditional> <param name='apply_other_base_content_filter_treatments' - type='boolean' checked="false" label="Filter sequences + type='boolean' truevalue="-noniupac" falsevalue="" + checked="false" label="Filter sequences with characters other than A, T, C, G and N?" help="By - default, this treatment is not made."/> + default, this treatment is not made. (-noniupac)"/> </when> + <when value="false" /> </conditional> <conditional name="complexity_filter_treatments"> <param name='apply_complexity_filter_treatments' type='boolean' - checked="false" label="Filter sequences based on their + checked="false" truevalue="true" falsevalue="false" + label="Filter sequences based on their complexity?" help="By default, no complexity filter is applied"/> <when value="true"> <param name="method_complexity_filter_treatments" type="select" display="radio" label="Method to filter low complexity - sequences" help=""> + sequences" help="(-lc_method)"> <option value="dust">Dust</option> <option value="entropy" >Entropy</option> </param> @@ -374,29 +455,35 @@ min="0" max="100" value="2" label="Threshold value used to filter sequences by sequence complexity" help="The dust method uses the threshold as maximum allowed score - and the entropy method as minimum allowed value."/> + and the entropy method as minimum allowed value.(-lc_threshold)"/> </when> + <when value="false" /> </conditional> </when> + <when value="false" /> </conditional> <conditional name="trimming_treatments"> <param name='apply_trimming_treatments' type='boolean' checked="true" - label="Apply trimming treatments?" help=""/> + truevalue="true" falsevalue="false" label="Apply trimming + treatments?" help=""/> <when value="true"> <conditional name="length_trimming_treatments"> <param name='apply_length_trimming_treatments' type='boolean' - checked="false" label="Trim all sequences from the 3'-end + checked="false" truevalue="true" falsevalue="false" + label="Trim all sequences from the 3'-end to a length?" help="By default, no length trimming is made"/> <when value="true"> <param name="length_trimming_treatment_value" type="integer" min="0" max="3000" value="100" label="Length of sequences after - trimming" help=""/> + trimming" help="(-trim_to_len)"/> </when> + <when value="false" /> </conditional> <conditional name="position_trimming_treatments"> <param name='apply_position_trimming_treatments' type='boolean' - checked="false" label="Trim all sequences from the ends?" + checked="false" truevalue="true" falsevalue="false" + label="Trim all sequences from the ends?" help="By default, no position trimming is made"/> <when value="true"> <conditional name="nb_position_trimming_treatments"> @@ -407,7 +494,8 @@ <when value="true"> <conditional name="left_position_trimming_treatments"> <param name='apply_left_position_trimming_treatments' - type='boolean' checked="false" label="Trim + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Trim sequences at the 5'-end by a defined number of positions?" help="By default, no position trimming is made"/> @@ -415,12 +503,14 @@ <param name="left_position_trimming_treatment_value" type="integer" min="0" max="3000" value="100" label="Number of - positions to trim on 5'-end" help=""/> + positions to trim on 5'-end" help="(-trim_left)"/> </when> + <when value="false" /> </conditional> <conditional name="right_position_trimming_treatments"> <param name='apply_right_position_trimming_treatments' - type='boolean' checked="false" label="Trim + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Trim sequences at the 3'-end by a defined number of positions?" help="By default, no position trimming is made"/> @@ -429,22 +519,26 @@ type="integer" min="0" max="3000" value="100" label="Number of positions to trim on 3'-end" - help=""/> + help="(-trim_right)"/> </when> + <when value="false" /> </conditional> </when> + <when value="false" /> </conditional> <conditional name="percentage_position_trimming_treatments"> <param name='apply_percentage_position_trimming_treatments' - type='boolean' checked="false" label="Trim sequences + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Trim sequences by a defined percentage of read length?" help="By default, no position trimming is made. The trim length is rounded towards the lower integer"/> <when value="true"> <conditional name="left_percentage_position_trimming_treatments"> <param name='apply_left_percentage_position_trimming_treatments' - type='boolean' checked="false" label="Trim - sequences at the 5'-end by a defined percentage + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Trim sequences + at the 5'-end by a defined percentage of read length?" help="By default, no position trimming is made. The trim length is rounded towards the lower integer"/> @@ -452,12 +546,14 @@ <param name="left_percentage_position_trimming_treatment_value" type="integer" min="0" max="100" value="2" label="Percentage - of positions to trim on 5'-end" help=""/> + of positions to trim on 5'-end" help="(-trim_left_p)"/> </when> + <when value="false" /> </conditional> <conditional name="right_percentage_position_trimming_treatments"> <param name='apply_right_percentage_position_trimming_treatments' - type='boolean' checked="false" label="Trim + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Trim sequences at the 3'-end by a defined percentage of read length?" help="By default, no position trimming is made. The trim length is rounded @@ -466,27 +562,33 @@ <param name="right_percentage_position_trimming_treatment_value" type="integer" min="0" max="100" value="2" label="Percentage - of positions to trim on 3'-end" help=""/> + of positions to trim on 3'-end" help="(-trim_right_p)"/> </when> + <when value="false" /> </conditional> </when> + <when value="false" /> </conditional> </when> + <when value="false" /> </conditional> <conditional name="tail_trimming_treatments"> <param name='apply_tail_trimming_treatments' type='boolean' - checked="false" label="Trim tails?" help="By default, no + checked="false" truevalue="true" falsevalue="false" + label="Trim tails?" help="By default, no tail trimming is made"/> <when value="true"> <conditional name="a_t_tail_trimming_treatments"> <param name='apply_a_t_tail_trimming_treatments' - type='boolean' checked="false" label="Trim poly-A/T + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Trim poly-A/T tail?" help="By default, no poly-A/T tail trimming is made"/> <when value="true"> <conditional name="left_a_t_tail_trimming_treatments"> <param name='apply_left_a_t_tail_trimming_treatments' - type='boolean' checked="false" label="Trim + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Trim poly-A/T tail at the 5'-end?" help="By default, no 5'-end poly-A/T tail trimming is made"/> @@ -495,12 +597,14 @@ type="integer" min="0" max="3000" value="100" label="Minimum length of poly-A/T to trim at the 5'-end" - help=""/> + help="(-trim_tail_left)"/> </when> + <when value="false" /> </conditional> <conditional name="right_a_t_tail_trimming_treatments"> <param name='apply_right_a_t_tail_trimming_treatments' - type='boolean' checked="false" label="Trim poly-A/T + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Trim poly-A/T tail at the 5'-end?" help="By default, no 3'-end poly-A/T tail trimming is made"/> <when value="true"> @@ -508,19 +612,23 @@ type="integer" min="0" max="3000" value="100" label="Minimum length of poly-A/T to trim at the 5'-end" - help=""/> + help="(-trim_tail_right)"/> </when> + <when value="false" /> </conditional> </when> + <when value="false" /> </conditional> <conditional name="ns_tail_trimming_treatments"> <param name='apply_ns_tail_trimming_treatments' type='boolean' - checked="false" label="Trim poly-N tail?" help="By + checked="false" truevalue="true" falsevalue="false" + label="Trim poly-N tail?" help="By default, no poly-N tail trimming is made"/> <when value="true"> <conditional name="left_ns_tail_trimming_treatments"> <param name='apply_left_ns_tail_trimming_treatments' - type='boolean' checked="false" label="Trim + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Trim poly-N tail at the 5'-end?" help="By default, no 5'-end poly-N tail trimming is made"/> <when value="true"> @@ -528,12 +636,14 @@ type="integer" min="0" max="3000" value="100" label="Minimum length of poly-N to trim at the 5'-end" - help=""/> + help="(-trim_ns_left)"/> </when> + <when value="false" /> </conditional> <conditional name="right_ns_tail_trimming_treatments"> <param name='apply_right_ns_tail_trimming_treatments' - type='boolean' checked="false" label="Trim + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Trim poly-N tail at the 5'-end?" help="By default, no 3'-end poly-N tail trimming is made"/> <when value="true"> @@ -541,46 +651,54 @@ type="integer" min="0" max="3000" value="100" label="Minimum length of poly-N to trim at the 5'-end" - help=""/> + help="(-trim_ns_right)"/> </when> + <when value="false" /> </conditional> </when> + <when value="false" /> </conditional> </when> + <when value="false" /> </conditional> <conditional name="quality_trimming_treatments"> <param name='apply_quality_trimming_treatments' type='boolean' - checked="true" label="Trim sequence by quality score?" + checked="true" truevalue="true" falsevalue="false" + label="Trim sequence by quality score?" help="By default, a 3'-end trimming is made to remove ends with a minimum quality score over 5 bp below 20"/> <when value="true"> <conditional name="left_quality_trimming_treatments"> <param name='apply_left_quality_trimming_treatments' - type='boolean' checked="false" label="Trim sequence + type='boolean' checked="false" truevalue="true" + falsevalue="false" label="Trim sequence by quality score from the 5'-end?" help="By default, no 5'-end quality trimming is made"/> <when value="true"> <param name="left_quality_trimming_treatment_value" type="integer" min="0" max="40" value="20" label="Quality score threshold to - trim positions" help=""/> + trim positions" help="(-trim_qual_left)"/> </when> + <when value="false" /> </conditional> <conditional name="right_quality_trimming_treatments"> <param name='apply_right_quality_trimming_treatments' - type='boolean' checked="true" label="Trim sequence + type='boolean' checked="true" truevalue="true" + falsevalue="false" label="Trim sequence by quality score from the 3'-end?" help="By default, 3'-end trimming is made based on a score of 20"/> <when value="true"> <param name="right_quality_trimming_treatment_value" type="integer" min="0" max="40" value="20" label="Quality score threshold to - trim positions" help=""/> + trim positions" help="(-trim_qual_right)"/> </when> + <when value="false" /> </conditional> <param name="type_quality_trimming_treatments" type="select" display="radio" label="Type of quality score calculation - to use" help="By default, min is used"> + to use" help="By default, min is used. (-trim_qual_type)"> <option value="min" selected="true">Mininum</option> <option value="mean" >Mean</option> <option value="max" >Max</option> @@ -589,7 +707,7 @@ <param name="rule_quality_trimming_treatments" type="select" display="radio" label="Rule tu use to compare quality score to calculated value" help="By default, 'less than' - is used"> + is used. (-trim_qual_rule)"> <option value="lt" selected="true">Less than</option> <option value="gt" >Greater than</option> <option value="et" >Equal to</option> @@ -598,26 +716,81 @@ min="0" max="300" value="1" label="Size of the sliding window used to calculated quality score by type" help="To stop at the first base that fails the rule defined, use a - window size of 1 (default value)"/> + window size of 1 (default value). (-trim_qual_window)"/> <param name="step_quality_trimming_treatments" type="integer" min="0" max="300" value="1" label="Step size used to move the sliding window" help="To move the window over all quality scores without missing any, the step size sould be less or equal to the window size. The default value - is 1."/> + is 1. (-trim_qual_step)"/> </when> + <when value="false" /> </conditional> </when> + <when value="false" /> </conditional> + + <param name="graph_stats" type="select" multiple="True" label="Which statistics should be calculated included in the graph_data file" help="(-graph_stats)"> + <option value="ld" selected="True">Length distribution</option> + <option value="gc" selected="True">GC content distribution</option> + <option value="qd" selected="True">Base quality distribution</option> + <option value="ns" selected="True">Occurence of N</option> + <option value="pt" selected="True">Poly-A/T tails</option> + <option value="ts" selected="True">Tag sequence check</option> + <option value="aq" selected="True">Assembly quality measure</option> + <option value="de" selected="True">Sequence duplication - exact only</option> + <option value="da" selected="True">Sequence duplication - exact + 5'/3'</option> + <option value="sc" selected="True">Sequence complexity</option> + <option value="dn" selected="True">Dinucleotide odds ratios, includes the PCA plots</option> + </param> </inputs> <outputs> <data format="fastq" name="good_sequence_file" from_work_dir="good_sequences.fastq" - label="Good sequences of ${on_string} (PRINSEQ)" /> + label="Good sequences of ${on_string} (PRINSEQ)" > + <filter>seq_type['seq_type_opt'] == "single"</filter> + </data> <data format="fastq" name="rejected_sequence_file" from_work_dir="rejected_sequences.fastq" - label="Rejected sequences of ${on_string} (PRINSEQ)" /> + label="Rejected sequences of ${on_string} (PRINSEQ)" > + <filter>seq_type['seq_type_opt'] == "single"</filter> + </data> + + <data format="fastq" name="good_sequences_1_file" + from_work_dir="good_sequences_1.fastq" + label="Good sequences for R1 of ${on_string} (PRINSEQ)" > + <filter>seq_type['seq_type_opt'] == "paired"</filter> + </data> + <data format="fastq" name="good_sequences_1_singletons_file" + from_work_dir="good_sequences_1_singletons.fastq" + label="Good singleton sequences for R1 of ${on_string} (PRINSEQ)" > + <filter>seq_type['seq_type_opt'] == "paired"</filter> + </data> + <data format="fastq" name="rejected_sequence_1_file" + from_work_dir="rejected_sequences_1.fastq" + label="Rejected sequences for R1 of ${on_string} (PRINSEQ)" > + <filter>seq_type['seq_type_opt'] == "paired"</filter> + </data> + <data format="fastq" name="good_sequences_2_file" + from_work_dir="good_sequences_2.fastq" + label="Good sequences for R2 of ${on_string} (PRINSEQ)" > + <filter>seq_type['seq_type_opt'] == "paired"</filter> + </data> + <data format="fastq" name="good_sequences_2_singletons_file" + from_work_dir="good_sequences_2_singletons.fastq" + label="Good singleton sequences for R2 of ${on_string} (PRINSEQ)" > + <filter>seq_type['seq_type_opt'] == "paired"</filter> + </data> + <data format="fastq" name="rejected_sequence_2_file" + from_work_dir="rejected_sequences_2.fastq" + label="Rejected sequences for R2 of ${on_string} (PRINSEQ)" > + <filter>seq_type['seq_type_opt'] == "paired"</filter> + </data> + + <data format="html" name="html_file" + from_work_dir="stats_html.html" + label="Summary of ${tool.name} on ${on_string} summary" /> </outputs> <tests> @@ -682,16 +855,20 @@ **Parameters** -The parameters are numerous in PRINSEQ +The parameters are numerous in PRINSEQ given the wanted treatments. + +Several filter treatments are proposed: - - Filtering parameters to eliminate sequences - - Filters based on sequence length - - Filters based on quality score - - Filters based on base content - - Trimming parameters to eliminate sequence parts - - Trim of ends - - Trim of tails - - Trim based quality score + - Filters based on sequence length + - Filters based on quality score + - Filters based on base content + +And several trimming treatments eliminate sequence parts: + - Trim of ends + - Trim of tails + - Trim based quality score + +All these treaments can be customed using the proposed parameters. ----- @@ -706,4 +883,4 @@ <citations> <citation type="doi">10.1093/bioinformatics/btr026</citation> </citations> -</tool> \ No newline at end of file +</tool>
--- a/tool_dependencies.xml Tue Nov 17 06:02:09 2015 -0500 +++ b/tool_dependencies.xml Wed Jan 27 03:55:09 2016 -0500 @@ -3,11 +3,15 @@ <package name="prinseq" version="0.20.4"> <install version="1.0"> <actions> - <action type="download_by_url">http://downloads.sourceforge.net/project/prinseq/standalone/prinseq-lite-0.20.4.tar.gz</action> + <action sha256sum="9b5e0dce3b7f02f09e1cc7e8a2dd77c0b133e5e35529d570ee901f53ebfeb56f" type="download_by_url">http://downloads.sourceforge.net/project/prinseq/standalone/prinseq-lite-0.20.4.tar.gz</action> <action type="move_file"> <source>prinseq-lite.pl</source> <destination>$INSTALL_DIR</destination> </action> + <action type="move_file"> + <source>prinseq-graphs-noPCA.pl</source> + <destination>$INSTALL_DIR</destination> + </action> <action type="set_environment"> <environment_variable name="PRINSEQ_DIR" action="set_to">$INSTALL_DIR/</environment_variable> <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>