Mercurial > repos > pjbriggs > pal_finder
comparison pal_finder_wrapper.xml @ 15:a3af1ff4cad1 draft
pal_finder 0.02.04.7 for testing.
author | pjbriggs |
---|---|
date | Mon, 14 May 2018 11:10:19 -0400 |
parents | 3f8bf1a0403b |
children | f7d63032217b |
comparison
equal
deleted
inserted
replaced
14:3f8bf1a0403b | 15:a3af1ff4cad1 |
---|---|
7 <requirement type="package" version="0.02.04">pal_finder</requirement> | 7 <requirement type="package" version="0.02.04">pal_finder</requirement> |
8 <requirement type="package" version="2.7">python</requirement> | 8 <requirement type="package" version="2.7">python</requirement> |
9 <requirement type="package" version="1.65">biopython</requirement> | 9 <requirement type="package" version="1.65">biopython</requirement> |
10 <requirement type="package" version="2.8.1">pandaseq</requirement> | 10 <requirement type="package" version="2.8.1">pandaseq</requirement> |
11 </requirements> | 11 </requirements> |
12 <command><![CDATA[ | 12 <command detect_errors="exit_code"><![CDATA[ |
13 @CONDA_PAL_FINDER_SCRIPT_DIR@ && | 13 @CONDA_PAL_FINDER_SCRIPT_DIR@ && |
14 @CONDA_PAL_FINDER_DATA_DIR@ && | 14 @CONDA_PAL_FINDER_DATA_DIR@ && |
15 bash $__tool_directory__/pal_finder_wrapper.sh | 15 bash $__tool_directory__/pal_finder_wrapper.sh |
16 #if str( $platform.platform_type ) == "illumina" | 16 #if str( $platform.platform_type ) == "illumina" |
17 #set $paired_input_type = $platform.paired_input_type_conditional.paired_input_type | 17 #set $paired_input_type = $platform.paired_input_type_conditional.paired_input_type |
61 --filter_microsats "$output_filtered_microsats" | 61 --filter_microsats "$output_filtered_microsats" |
62 #end for | 62 #end for |
63 #end if | 63 #end if |
64 #if str( $platform.assembly ) == '-assembly' | 64 #if str( $platform.assembly ) == '-assembly' |
65 $platform.assembly "$output_assembly" | 65 $platform.assembly "$output_assembly" |
66 #end if | |
67 #set $use_all_reads = $platform.subset_conditional.use_all_reads | |
68 #if str( $use_all_reads ) != "yes" | |
69 --subset "$platform.subset_conditional.subset" | |
66 #end if | 70 #end if |
67 #end if | 71 #end if |
68 ]]></command> | 72 ]]></command> |
69 <inputs> | 73 <inputs> |
70 <param name="primer_prefix" type="text" value="test" size="25" label="Primer prefix" help="This prefix will be added to the beginning of all primer names" /> | 74 <param name="primer_prefix" type="text" value="test" size="25" label="Primer prefix" help="This prefix will be added to the beginning of all primer names" /> |
89 <param name="input_fastq_pair" format="fastqsanger" | 93 <param name="input_fastq_pair" format="fastqsanger" |
90 type="data_collection" collection_type="paired" | 94 type="data_collection" collection_type="paired" |
91 label="Select FASTQ dataset collection with R1/R2 pair" /> | 95 label="Select FASTQ dataset collection with R1/R2 pair" /> |
92 </when> | 96 </when> |
93 </conditional> | 97 </conditional> |
98 <conditional name="subset_conditional"> | |
99 <param name="use_all_reads" type="boolean" label="Use all reads for microsatellite detection?" checked="True" truevalue="yes" falsevalue="no" /> | |
100 <when value="no"> | |
101 <param name="subset" type="text" value="0.5" label="Number or fraction of reads to use" help="Either an integer number of reads or a decimal fraction (e.g. 0.5 to select 50% of reads)" /> | |
102 </when> | |
103 <when value="yes" /> | |
104 </conditional> | |
94 <param name="filters" type="select" display="checkboxes" | 105 <param name="filters" type="select" display="checkboxes" |
95 multiple="True" label="Filters to apply to the pal_finder results" | 106 multiple="True" label="Filters to apply to the pal_finder results" |
96 help="Apply none, one or more filters to refine results"> | 107 help="Apply none, one or more filters to refine results"> |
97 <option value="-primers" selected="True">Only include loci with designed primers</option> | 108 <option value="-primers" selected="True">Only include loci with designed primers</option> |
98 <option value="-occurrences" selected="True">Exclude loci where the primer sequences occur more than once in the reads</option> | 109 <option value="-occurrences" selected="True">Exclude loci where the primer sequences occur more than once in the reads</option> |
104 </when> | 115 </when> |
105 <when value="454"> | 116 <when value="454"> |
106 <param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" /> | 117 <param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" /> |
107 </when> | 118 </when> |
108 </conditional> | 119 </conditional> |
109 <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" help="Set to zero to ignore repeats of this n-mer unit" /> | 120 <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" min="1" help="Must detect at least one repeat of this n-mer unit" /> |
110 <param name="min_3mer_repeats" type="integer" value="0" label="Minimum number of 3-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> | 121 <param name="min_3mer_repeats" type="integer" value="0" label="Minimum number of 3-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> |
111 <param name="min_4mer_repeats" type="integer" value="0" label="Minimum number of 4-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> | 122 <param name="min_4mer_repeats" type="integer" value="0" label="Minimum number of 4-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> |
112 <param name="min_5mer_repeats" type="integer" value="0" label="Minimum number of 5-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> | 123 <param name="min_5mer_repeats" type="integer" value="0" label="Minimum number of 5-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> |
113 <param name="min_6mer_repeats" type="integer" value="0" label="Minimum number of 6-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> | 124 <param name="min_6mer_repeats" type="integer" value="0" label="Minimum number of 6-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" /> |
114 <conditional name="mispriming"> | 125 <conditional name="mispriming"> |
156 help="Temperature should be in degrees Celsius" /> | 167 help="Temperature should be in degrees Celsius" /> |
157 <param name="primer_pair_max_diff_tm" type="float" value="2.0" | 168 <param name="primer_pair_max_diff_tm" type="float" value="2.0" |
158 label="Maximum acceptable difference between melting temperatures of left and right primers (PRIMER_PAIR_MAX_DIFF_TM)" | 169 label="Maximum acceptable difference between melting temperatures of left and right primers (PRIMER_PAIR_MAX_DIFF_TM)" |
159 help="Temperature should be in degrees Celsius" /> | 170 help="Temperature should be in degrees Celsius" /> |
160 </when> | 171 </when> |
172 <when value="default" /> | |
161 </conditional> | 173 </conditional> |
162 <param name="report_bad_primer_ranges" type="boolean" truevalue="True" falsevalue="False" label="Output IDs for input reads which generate bad primer ranges" help="Can be used to screen input Fastqs" /> | 174 <param name="report_bad_primer_ranges" type="boolean" truevalue="True" falsevalue="False" label="Output IDs for input reads which generate bad primer product size ranges" help="Can be used to screen reads in input Fastqs " /> |
163 <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False" | 175 <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False" |
164 label="Output the config file to the history" | 176 label="Output the config file to the history" |
165 help="Can be used to run pal_finder outside of Galaxy" /> | 177 help="Can be used to run pal_finder outside of Galaxy" /> |
166 </inputs> | 178 </inputs> |
167 <outputs> | 179 <outputs> |
252 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> | 264 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> |
253 <expand macro="output_illumina_microsat_summary" /> | 265 <expand macro="output_illumina_microsat_summary" /> |
254 <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" /> | 266 <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" /> |
255 <output name="output_filtered_microsats" compare="re_match" file="illuminaPE_filtered_microsats_rankmotifs.out.re_match" /> | 267 <output name="output_filtered_microsats" compare="re_match" file="illuminaPE_filtered_microsats_rankmotifs.out.re_match" /> |
256 </test> | 268 </test> |
257 <!-- Test with Illumina input generating bad primer ranges | 269 <!-- Test with Illumina input using subset of reads --> |
258 --> | |
259 <test> | 270 <test> |
260 <param name="platform_type" value="illumina" /> | 271 <param name="platform_type" value="illumina" /> |
261 <param name="filters" value="" /> | 272 <param name="filters" value="" /> |
262 <param name="assembly" value="false" /> | 273 <param name="assembly" value="false" /> |
263 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> | 274 <param name="use_all_reads" value="no" /> |
264 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> | 275 <param name="subset" value="0.5" /> |
265 <param name="output_bad_primer_read_ids" value="true" /> | 276 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> |
266 <expand macro="output_illumina_microsat_summary" /> | 277 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> |
267 <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" /> | 278 <expand macro="output_illumina_microsat_subset_summary" /> |
268 <output name="output_bad_primer_read_ids" file="illuminaPE_bad_primer_ids.out" /> | 279 <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats_subset.out.re_match" /> |
280 </test> | |
281 <!-- Test with Illumina input filter that doesn't find any | |
282 microsatellites --> | |
283 <test expect_failure="true"> | |
284 <param name="platform_type" value="illumina" /> | |
285 <param name="filters" value="" /> | |
286 <param name="assembly" value="false" /> | |
287 <param name="min_2mer_repeats" value="8" /> | |
288 <param name="input_fastq_r1" value="illuminaPE_r1_no_microsats.fq" ftype="fastqsanger" /> | |
289 <param name="input_fastq_r2" value="illuminaPE_r2_no_microsats.fq" ftype="fastqsanger" /> | |
290 <assert_stderr> | |
291 <has_text text="pal_finder failed to locate any microsatellites" /> | |
292 </assert_stderr> | |
293 </test> | |
294 <!-- Test with Illumina input generating bad ranges --> | |
295 <test> | |
296 <param name="platform_type" value="illumina" /> | |
297 <param name="filters" value="" /> | |
298 <param name="assembly" value="false" /> | |
299 <param name="min_2mer_repeats" value="8" /> | |
300 <param name="input_fastq_r1" value="illuminaPE_r1_bad_ranges.fq" ftype="fastqsanger" /> | |
301 <param name="input_fastq_r2" value="illuminaPE_r2_bad_ranges.fq" ftype="fastqsanger" /> | |
302 <param name="min_2mer_repeats" value="8" /> | |
303 <param name="min_3mer_repeats" value="8" /> | |
304 <param name="min_4mer_repeats" value="8" /> | |
305 <param name="min_5mer_repeats" value="8" /> | |
306 <param name="min_6mer_repeats" value="8" /> | |
307 <param name="primer_options" value="custom" /> | |
308 <param name="primer_opt_size" value="25" /> | |
309 <param name="primer_min_size" value="21" /> | |
310 <param name="primer_max_size" value="30" /> | |
311 <param name="primer_min_gc" value="40.0" /> | |
312 <param name="primer_max_gc" value="60.0" /> | |
313 <param name="primer_gc_clamp" value="3" /> | |
314 <param name="primer_max_end_gc" value="5" /> | |
315 <param name="primer_min_tm" value="60.0" /> | |
316 <param name="primer_max_tm" value="80.0" /> | |
317 <param name="primer_opt_tm" value="68.0" /> | |
318 <param name="primer_pair_max_diff_tm" value="3.0" /> | |
319 <param name="report_bad_primer_ranges" value="true" /> | |
320 <expand macro="output_illumina_microsat_summary_bad_ranges" /> | |
321 <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats_bad_ranges.out.re_match" /> | |
322 <output name="output_bad_primer_read_ids" file="illuminaPE_bad_primer_read_ids.out" /> | |
323 </test> | |
324 <!-- Test with bad n-mers specified --> | |
325 <test expect_failure="true"> | |
326 <param name="platform_type" value="illumina" /> | |
327 <param name="filters" value="" /> | |
328 <param name="assembly" value="false" /> | |
329 <param name="min_2mer_repeats" value="8" /> | |
330 <param name="min_3mer_repeats" value="8" /> | |
331 <param name="min_4mer_repeats" value="0" /> | |
332 <param name="min_5mer_repeats" value="8" /> | |
333 <param name="min_6mer_repeats" value="8" /> | |
334 <param name="input_fastq_r1" value="illuminaPE_r1_no_microsats.fq" ftype="fastqsanger" /> | |
335 <param name="input_fastq_r2" value="illuminaPE_r2_no_microsats.fq" ftype="fastqsanger" /> | |
336 <assert_stderr> | |
337 <has_text text="Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" /> | |
338 </assert_stderr> | |
269 </test> | 339 </test> |
270 <!-- Test with 454 input --> | 340 <!-- Test with 454 input --> |
271 <test> | 341 <test> |
272 <param name="platform_type" value="454" /> | 342 <param name="platform_type" value="454" /> |
273 <param name="input_fasta" value="454_in.fa" ftype="fasta" /> | 343 <param name="input_fasta" value="454_in.fa" ftype="fasta" /> |
298 primer3_core can be found in the Primer3 manual at | 368 primer3_core can be found in the Primer3 manual at |
299 http://primer3.sourceforge.net/primer3_manual.htm | 369 http://primer3.sourceforge.net/primer3_manual.htm |
300 | 370 |
301 ------------- | 371 ------------- |
302 | 372 |
373 .. class:: infomark | |
374 | |
375 **Known issues** | |
376 | |
303 .. class:: warning | 377 .. class:: warning |
304 | 378 |
305 **Known problems** | 379 **Low number of reads used for microsatellite detection/bad primer product size ranges** |
306 | |
307 .. class:: infomark | |
308 | |
309 **Bad primer product size ranges** | |
310 | 380 |
311 For some datasets pal_finder may generate 'bad' product size ranges (where the | 381 For some datasets pal_finder may generate 'bad' product size ranges (where the |
312 lower limit exceeds the upper limit) for one or more reads, for input into | 382 lower limit exceeds the upper limit) for one or more reads, for input into |
313 primer3_core. | 383 primer3_core. In these cases primer3_core will terminate prematurely, which can |
314 | 384 result in a substantially lower number of reads being used for microsatellite |
315 If this occurs then the tool will terminate with an error. A list of the reads | 385 detection and potentially sub-optimal primer design. |
316 for which the bad ranges were generated can be found in the error message | 386 |
317 which can be accessed via the 'bug' icon from a failed dataset. | 387 The number of reads generating the bad size ranges are reported in the |
318 | 388 *Summary of microsat types* output dataset as 'readsWithBadRanges'. Ideally |
319 The conditions which cause this error are unclear. However we believe it to be | 389 the reported value should be zero. |
320 associated with short or low quality reads. It is recommended that the input | 390 |
321 data are sufficiently trimmed and filtered (using e.g. the Trimmomatic tool) | 391 The conditions which cause this issue within pal_finder are still unclear, |
322 before rerunning pal_finder. | 392 however we believe it to be associated with short or low quality reads. If this |
393 problem affects your data then: | |
394 | |
395 * Ensure that the input data are sufficiently trimmed and filtered (using | |
396 e.g. the Trimmomatic tool) before rerunning pal_finder. | |
397 | |
398 * A list of read IDs for which pal_finder generates bad product size ranges can | |
399 be output by turning on *Output IDs for input reads which generate bad primer | |
400 ranges*. This outputs an additional dataset with a list of read IDs which can | |
401 be used to remove read pairs from the input Fastq files (using e.g. the *Filter | |
402 sequences by ID* tool) before rerunning pal_finder. | |
403 | |
404 .. class:: warning | |
405 | |
406 **Pal_finder takes a long time to run for large input datasets** | |
407 | |
408 pal_finder was originally developed using MiSeq data, and is not optimised for | |
409 working with the larger Fastqs that are output from other platforms such as | |
410 HiSeq and NextSeq. As a consequence pal_finder may take a very long time to | |
411 complete when operating on larger datasets. | |
412 | |
413 If this is a problem then the tool can be run using a subset of the input reads | |
414 by unchecking the *Use all reads...* option and entering either an integer number | |
415 of reads to use, or a decimal fraction (e.g. 0.5 will select 50% of the reads). | |
323 | 416 |
324 ------------- | 417 ------------- |
325 | 418 |
326 .. class:: infomark | 419 .. class:: infomark |
327 | 420 |