comparison pal_finder_wrapper.xml @ 15:a3af1ff4cad1 draft

pal_finder 0.02.04.7 for testing.
author pjbriggs
date Mon, 14 May 2018 11:10:19 -0400
parents 3f8bf1a0403b
children f7d63032217b
comparison
equal deleted inserted replaced
14:3f8bf1a0403b 15:a3af1ff4cad1
7 <requirement type="package" version="0.02.04">pal_finder</requirement> 7 <requirement type="package" version="0.02.04">pal_finder</requirement>
8 <requirement type="package" version="2.7">python</requirement> 8 <requirement type="package" version="2.7">python</requirement>
9 <requirement type="package" version="1.65">biopython</requirement> 9 <requirement type="package" version="1.65">biopython</requirement>
10 <requirement type="package" version="2.8.1">pandaseq</requirement> 10 <requirement type="package" version="2.8.1">pandaseq</requirement>
11 </requirements> 11 </requirements>
12 <command><![CDATA[ 12 <command detect_errors="exit_code"><![CDATA[
13 @CONDA_PAL_FINDER_SCRIPT_DIR@ && 13 @CONDA_PAL_FINDER_SCRIPT_DIR@ &&
14 @CONDA_PAL_FINDER_DATA_DIR@ && 14 @CONDA_PAL_FINDER_DATA_DIR@ &&
15 bash $__tool_directory__/pal_finder_wrapper.sh 15 bash $__tool_directory__/pal_finder_wrapper.sh
16 #if str( $platform.platform_type ) == "illumina" 16 #if str( $platform.platform_type ) == "illumina"
17 #set $paired_input_type = $platform.paired_input_type_conditional.paired_input_type 17 #set $paired_input_type = $platform.paired_input_type_conditional.paired_input_type
61 --filter_microsats "$output_filtered_microsats" 61 --filter_microsats "$output_filtered_microsats"
62 #end for 62 #end for
63 #end if 63 #end if
64 #if str( $platform.assembly ) == '-assembly' 64 #if str( $platform.assembly ) == '-assembly'
65 $platform.assembly "$output_assembly" 65 $platform.assembly "$output_assembly"
66 #end if
67 #set $use_all_reads = $platform.subset_conditional.use_all_reads
68 #if str( $use_all_reads ) != "yes"
69 --subset "$platform.subset_conditional.subset"
66 #end if 70 #end if
67 #end if 71 #end if
68 ]]></command> 72 ]]></command>
69 <inputs> 73 <inputs>
70 <param name="primer_prefix" type="text" value="test" size="25" label="Primer prefix" help="This prefix will be added to the beginning of all primer names" /> 74 <param name="primer_prefix" type="text" value="test" size="25" label="Primer prefix" help="This prefix will be added to the beginning of all primer names" />
89 <param name="input_fastq_pair" format="fastqsanger" 93 <param name="input_fastq_pair" format="fastqsanger"
90 type="data_collection" collection_type="paired" 94 type="data_collection" collection_type="paired"
91 label="Select FASTQ dataset collection with R1/R2 pair" /> 95 label="Select FASTQ dataset collection with R1/R2 pair" />
92 </when> 96 </when>
93 </conditional> 97 </conditional>
98 <conditional name="subset_conditional">
99 <param name="use_all_reads" type="boolean" label="Use all reads for microsatellite detection?" checked="True" truevalue="yes" falsevalue="no" />
100 <when value="no">
101 <param name="subset" type="text" value="0.5" label="Number or fraction of reads to use" help="Either an integer number of reads or a decimal fraction (e.g. 0.5 to select 50% of reads)" />
102 </when>
103 <when value="yes" />
104 </conditional>
94 <param name="filters" type="select" display="checkboxes" 105 <param name="filters" type="select" display="checkboxes"
95 multiple="True" label="Filters to apply to the pal_finder results" 106 multiple="True" label="Filters to apply to the pal_finder results"
96 help="Apply none, one or more filters to refine results"> 107 help="Apply none, one or more filters to refine results">
97 <option value="-primers" selected="True">Only include loci with designed primers</option> 108 <option value="-primers" selected="True">Only include loci with designed primers</option>
98 <option value="-occurrences" selected="True">Exclude loci where the primer sequences occur more than once in the reads</option> 109 <option value="-occurrences" selected="True">Exclude loci where the primer sequences occur more than once in the reads</option>
104 </when> 115 </when>
105 <when value="454"> 116 <when value="454">
106 <param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" /> 117 <param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" />
107 </when> 118 </when>
108 </conditional> 119 </conditional>
109 <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" help="Set to zero to ignore repeats of this n-mer unit" /> 120 <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" min="1" help="Must detect at least one repeat of this n-mer unit" />
110 <param name="min_3mer_repeats" type="integer" value="0" label="Minimum number of 3-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> 121 <param name="min_3mer_repeats" type="integer" value="0" label="Minimum number of 3-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" />
111 <param name="min_4mer_repeats" type="integer" value="0" label="Minimum number of 4-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> 122 <param name="min_4mer_repeats" type="integer" value="0" label="Minimum number of 4-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" />
112 <param name="min_5mer_repeats" type="integer" value="0" label="Minimum number of 5-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> 123 <param name="min_5mer_repeats" type="integer" value="0" label="Minimum number of 5-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" />
113 <param name="min_6mer_repeats" type="integer" value="0" label="Minimum number of 6-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> 124 <param name="min_6mer_repeats" type="integer" value="0" label="Minimum number of 6-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" />
114 <conditional name="mispriming"> 125 <conditional name="mispriming">
156 help="Temperature should be in degrees Celsius" /> 167 help="Temperature should be in degrees Celsius" />
157 <param name="primer_pair_max_diff_tm" type="float" value="2.0" 168 <param name="primer_pair_max_diff_tm" type="float" value="2.0"
158 label="Maximum acceptable difference between melting temperatures of left and right primers (PRIMER_PAIR_MAX_DIFF_TM)" 169 label="Maximum acceptable difference between melting temperatures of left and right primers (PRIMER_PAIR_MAX_DIFF_TM)"
159 help="Temperature should be in degrees Celsius" /> 170 help="Temperature should be in degrees Celsius" />
160 </when> 171 </when>
172 <when value="default" />
161 </conditional> 173 </conditional>
162 <param name="report_bad_primer_ranges" type="boolean" truevalue="True" falsevalue="False" label="Output IDs for input reads which generate bad primer ranges" help="Can be used to screen input Fastqs" /> 174 <param name="report_bad_primer_ranges" type="boolean" truevalue="True" falsevalue="False" label="Output IDs for input reads which generate bad primer product size ranges" help="Can be used to screen reads in input Fastqs " />
163 <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False" 175 <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False"
164 label="Output the config file to the history" 176 label="Output the config file to the history"
165 help="Can be used to run pal_finder outside of Galaxy" /> 177 help="Can be used to run pal_finder outside of Galaxy" />
166 </inputs> 178 </inputs>
167 <outputs> 179 <outputs>
252 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> 264 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
253 <expand macro="output_illumina_microsat_summary" /> 265 <expand macro="output_illumina_microsat_summary" />
254 <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" /> 266 <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" />
255 <output name="output_filtered_microsats" compare="re_match" file="illuminaPE_filtered_microsats_rankmotifs.out.re_match" /> 267 <output name="output_filtered_microsats" compare="re_match" file="illuminaPE_filtered_microsats_rankmotifs.out.re_match" />
256 </test> 268 </test>
257 <!-- Test with Illumina input generating bad primer ranges 269 <!-- Test with Illumina input using subset of reads -->
258 -->
259 <test> 270 <test>
260 <param name="platform_type" value="illumina" /> 271 <param name="platform_type" value="illumina" />
261 <param name="filters" value="" /> 272 <param name="filters" value="" />
262 <param name="assembly" value="false" /> 273 <param name="assembly" value="false" />
263 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> 274 <param name="use_all_reads" value="no" />
264 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> 275 <param name="subset" value="0.5" />
265 <param name="output_bad_primer_read_ids" value="true" /> 276 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" />
266 <expand macro="output_illumina_microsat_summary" /> 277 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
267 <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" /> 278 <expand macro="output_illumina_microsat_subset_summary" />
268 <output name="output_bad_primer_read_ids" file="illuminaPE_bad_primer_ids.out" /> 279 <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats_subset.out.re_match" />
280 </test>
281 <!-- Test with Illumina input filter that doesn't find any
282 microsatellites -->
283 <test expect_failure="true">
284 <param name="platform_type" value="illumina" />
285 <param name="filters" value="" />
286 <param name="assembly" value="false" />
287 <param name="min_2mer_repeats" value="8" />
288 <param name="input_fastq_r1" value="illuminaPE_r1_no_microsats.fq" ftype="fastqsanger" />
289 <param name="input_fastq_r2" value="illuminaPE_r2_no_microsats.fq" ftype="fastqsanger" />
290 <assert_stderr>
291 <has_text text="pal_finder failed to locate any microsatellites" />
292 </assert_stderr>
293 </test>
294 <!-- Test with Illumina input generating bad ranges -->
295 <test>
296 <param name="platform_type" value="illumina" />
297 <param name="filters" value="" />
298 <param name="assembly" value="false" />
299 <param name="min_2mer_repeats" value="8" />
300 <param name="input_fastq_r1" value="illuminaPE_r1_bad_ranges.fq" ftype="fastqsanger" />
301 <param name="input_fastq_r2" value="illuminaPE_r2_bad_ranges.fq" ftype="fastqsanger" />
302 <param name="min_2mer_repeats" value="8" />
303 <param name="min_3mer_repeats" value="8" />
304 <param name="min_4mer_repeats" value="8" />
305 <param name="min_5mer_repeats" value="8" />
306 <param name="min_6mer_repeats" value="8" />
307 <param name="primer_options" value="custom" />
308 <param name="primer_opt_size" value="25" />
309 <param name="primer_min_size" value="21" />
310 <param name="primer_max_size" value="30" />
311 <param name="primer_min_gc" value="40.0" />
312 <param name="primer_max_gc" value="60.0" />
313 <param name="primer_gc_clamp" value="3" />
314 <param name="primer_max_end_gc" value="5" />
315 <param name="primer_min_tm" value="60.0" />
316 <param name="primer_max_tm" value="80.0" />
317 <param name="primer_opt_tm" value="68.0" />
318 <param name="primer_pair_max_diff_tm" value="3.0" />
319 <param name="report_bad_primer_ranges" value="true" />
320 <expand macro="output_illumina_microsat_summary_bad_ranges" />
321 <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats_bad_ranges.out.re_match" />
322 <output name="output_bad_primer_read_ids" file="illuminaPE_bad_primer_read_ids.out" />
323 </test>
324 <!-- Test with bad n-mers specified -->
325 <test expect_failure="true">
326 <param name="platform_type" value="illumina" />
327 <param name="filters" value="" />
328 <param name="assembly" value="false" />
329 <param name="min_2mer_repeats" value="8" />
330 <param name="min_3mer_repeats" value="8" />
331 <param name="min_4mer_repeats" value="0" />
332 <param name="min_5mer_repeats" value="8" />
333 <param name="min_6mer_repeats" value="8" />
334 <param name="input_fastq_r1" value="illuminaPE_r1_no_microsats.fq" ftype="fastqsanger" />
335 <param name="input_fastq_r2" value="illuminaPE_r2_no_microsats.fq" ftype="fastqsanger" />
336 <assert_stderr>
337 <has_text text="Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" />
338 </assert_stderr>
269 </test> 339 </test>
270 <!-- Test with 454 input --> 340 <!-- Test with 454 input -->
271 <test> 341 <test>
272 <param name="platform_type" value="454" /> 342 <param name="platform_type" value="454" />
273 <param name="input_fasta" value="454_in.fa" ftype="fasta" /> 343 <param name="input_fasta" value="454_in.fa" ftype="fasta" />
298 primer3_core can be found in the Primer3 manual at 368 primer3_core can be found in the Primer3 manual at
299 http://primer3.sourceforge.net/primer3_manual.htm 369 http://primer3.sourceforge.net/primer3_manual.htm
300 370
301 ------------- 371 -------------
302 372
373 .. class:: infomark
374
375 **Known issues**
376
303 .. class:: warning 377 .. class:: warning
304 378
305 **Known problems** 379 **Low number of reads used for microsatellite detection/bad primer product size ranges**
306
307 .. class:: infomark
308
309 **Bad primer product size ranges**
310 380
311 For some datasets pal_finder may generate 'bad' product size ranges (where the 381 For some datasets pal_finder may generate 'bad' product size ranges (where the
312 lower limit exceeds the upper limit) for one or more reads, for input into 382 lower limit exceeds the upper limit) for one or more reads, for input into
313 primer3_core. 383 primer3_core. In these cases primer3_core will terminate prematurely, which can
314 384 result in a substantially lower number of reads being used for microsatellite
315 If this occurs then the tool will terminate with an error. A list of the reads 385 detection and potentially sub-optimal primer design.
316 for which the bad ranges were generated can be found in the error message 386
317 which can be accessed via the 'bug' icon from a failed dataset. 387 The number of reads generating the bad size ranges are reported in the
318 388 *Summary of microsat types* output dataset as 'readsWithBadRanges'. Ideally
319 The conditions which cause this error are unclear. However we believe it to be 389 the reported value should be zero.
320 associated with short or low quality reads. It is recommended that the input 390
321 data are sufficiently trimmed and filtered (using e.g. the Trimmomatic tool) 391 The conditions which cause this issue within pal_finder are still unclear,
322 before rerunning pal_finder. 392 however we believe it to be associated with short or low quality reads. If this
393 problem affects your data then:
394
395 * Ensure that the input data are sufficiently trimmed and filtered (using
396 e.g. the Trimmomatic tool) before rerunning pal_finder.
397
398 * A list of read IDs for which pal_finder generates bad product size ranges can
399 be output by turning on *Output IDs for input reads which generate bad primer
400 ranges*. This outputs an additional dataset with a list of read IDs which can
401 be used to remove read pairs from the input Fastq files (using e.g. the *Filter
402 sequences by ID* tool) before rerunning pal_finder.
403
404 .. class:: warning
405
406 **Pal_finder takes a long time to run for large input datasets**
407
408 pal_finder was originally developed using MiSeq data, and is not optimised for
409 working with the larger Fastqs that are output from other platforms such as
410 HiSeq and NextSeq. As a consequence pal_finder may take a very long time to
411 complete when operating on larger datasets.
412
413 If this is a problem then the tool can be run using a subset of the input reads
414 by unchecking the *Use all reads...* option and entering either an integer number
415 of reads to use, or a decimal fraction (e.g. 0.5 will select 50% of the reads).
323 416
324 ------------- 417 -------------
325 418
326 .. class:: infomark 419 .. class:: infomark
327 420