comparison polypolish.xml @ 1:df205c588e6f draft default tip

planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/polypolish commit f3b5a8f978923c5bbf663b0a99025245458e6ee0
author iuc
date Tue, 12 Nov 2024 09:56:19 +0000
parents 0c094ece2c73
children
comparison
equal deleted inserted replaced
0:0c094ece2c73 1:df205c588e6f
3 Short-read polishing of long-read bacterial genome assemblies 3 Short-read polishing of long-read bacterial genome assemblies
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>macro.xml</import> 6 <import>macro.xml</import>
7 </macros> 7 </macros>
8 <expand macro='xrefs'/> 8 <expand macro="xrefs"/>
9 <expand macro="requirements" /> 9 <expand macro="requirements"/>
10 <expand macro="version_command" /> 10 <expand macro="version_command"/>
11 <command detect_errors="aggressive"><![CDATA[ 11 <command detect_errors="aggressive"><![CDATA[
12 ln -s '$input.fasta_file' input_data && 12 ln -s '$input.fasta_file' input_data &&
13 #*====================================== 13 #*======================================
14 For single SAM 14 For single SAM
15 ======================================*# 15 ======================================*#
16 #if $input.sam_data_type.sam_selector == 'single' 16 #if $input.sam_data_type.sam_selector == 'single'
17 #if $input.sam_data_type.single_sam.ext == 'unsorted.bam' 17 #if $input.sam_data_type.single_sam.is_of_type("unsorted.bam")
18 samtools view -h $input.sam_data_type.single_sam > input_sam && 18 samtools view -@ \$addthreads -h $input.sam_data_type.single_sam > input_sam &&
19 #elif $input.sam_data_type.single_sam.ext == 'sam' 19 #elif $input.sam_data_type.single_sam.ext == 'sam'
20 ln -s $input.sam_data_type.single_sam input_sam && 20 ln -s $input.sam_data_type.single_sam input_sam &&
21 #else
22 echo "${input.sam_data_type.single_sam} not a sam/bam file"
21 #end if 23 #end if
22 polypolish input_data input_sam > '$polished_fasta' 24 polypolish polish input_data input_sam > '$polished_fasta'
23 #*====================================== 25 #*======================================
24 For paired SAM 26 For paired SAM
25 ======================================*# 27 ======================================*#
26 #elif $input.sam_data_type.sam_selector == 'paired' 28 #elif $input.sam_data_type.sam_selector == 'paired'
27 #if $input.sam_data_type.R1_sam.ext == 'unsorted.bam' 29 #if $input.sam_data_type.R1_sam.is_of_type("unsorted.bam")
28 samtools view -h $input.sam_data_type.R1_sam > sample_R1.sam && 30 samtools view -@ \$addthreads -h $input.sam_data_type.R1_sam > sample_R1.sam &&
29 #elif $input.sam_data_type.R1_sam.ext == 'sam' 31 #elif $input.sam_data_type.R1_sam.ext == 'sam'
30 ln -s '$input.sam_data_type.R1_sam' sample_R1.sam && 32 ln -s '$input.sam_data_type.R1_sam' sample_R1.sam &&
33 #else
34 echo "${input.sam_data_type.single_sam} not a sam/bam file"
31 #end if 35 #end if
32 #if $input.sam_data_type.R2_sam.ext == 'unsorted.bam' 36 #if $input.sam_data_type.R2_sam.is_of_type("unsorted.bam")
33 samtools view -h $input.sam_data_type.R2_sam > sample_R2.sam && 37 samtools view -@ \$addthreads -h $input.sam_data_type.R2_sam > sample_R2.sam &&
34 #elif $input.sam_data_type.R2_sam.ext == 'sam' 38 #elif $input.sam_data_type.R2_sam.ext == 'sam'
35 ln -s '$input.sam_data_type.R2_sam' sample_R2.sam && 39 ln -s '$input.sam_data_type.R2_sam' sample_R2.sam &&
40 #else
41 echo "${input.sam_data_type.single_sam} not a sam/bam file"
36 #end if 42 #end if
37 #if $input.sam_data_type.insert_filter.filter_select == 'filter' 43 #if $input.sam_data_type.insert_filter.filter_select == 'filter'
38 polypolish_insert_filter.py --low '$input.sam_data_type.insert_filter.low' 44 polypolish filter --low '$input.sam_data_type.insert_filter.low'
39 --high '$input.sam_data_type.insert_filter.high' 45 --high '$input.sam_data_type.insert_filter.high'
40 --in1 sample_R1.sam 46 --in1 sample_R1.sam
41 --in2 sample_R2.sam 47 --in2 sample_R2.sam
42 --out1 'filtered_1.sam' 48 --out1 'filtered_1.sam'
43 --out2 'filtered_2.sam' && 49 --out2 'filtered_2.sam' &&
44 polypolish input_data 'filtered_1.sam' 'filtered_2.sam' > $polished_fasta 50 polypolish polish input_data 'filtered_1.sam' 'filtered_2.sam' > $polished_fasta
45 #else 51 #else
46 polypolish input_data sample_R1.sam sample_R2.sam > $polished_fasta 52 polypolish polish input_data sample_R1.sam sample_R2.sam > $polished_fasta
47 #end if 53 #end if
48 #*====================================== 54 #*======================================
49 For multiple single-end SAM 55 For multiple single-end SAM
50 ======================================*# 56 ======================================*#
51 #elif $input.sam_data_type.sam_selector == 'multiple_single' 57 #elif $input.sam_data_type.sam_selector == 'multiple_single'
52 mkdir single_collection && 58 mkdir single_collection &&
53 #for $value, $single_sam in enumerate($input.sam_data_type.single_collection): 59 #for $value, $single_sam in enumerate($input.sam_data_type.single_collection):
54 #if $single_sam.ext == 'unsorted.bam' 60 #if $single_sam.is_of_type("unsorted.bam")
55 samtools view -h $single_sam > 'single_collection/$(single_sam.element_identifier).sam' && 61 samtools view -@ \$addthreads -h $single_sam > 'single_collection/$(single_sam.element_identifier).sam' &&
56 #elif $single_sam.ext == 'sam' 62 #elif $single_sam.ext == 'sam'
57 ln -s $single_sam 'single_collection/$(single_sam.element_identifier).$(single_sam.ext)' && 63 ln -s $single_sam 'single_collection/$(single_sam.element_identifier).$(single_sam.ext)' &&
64 #else
65 echo "${single_sam} is not a sam/bam file"
58 #end if 66 #end if
59 #end for 67 #end for
60 polypolish input_data single_collection/*.sam > '$polished_fasta' 68 polypolish polish input_data single_collection/*.sam > '$polished_fasta'
61 #*====================================== 69 #*======================================
62 For multiple paired-end SAM 70 For multiple paired-end SAM
63 ======================================*# 71 ======================================*#
64 #elif $input.sam_data_type.sam_selector == "multiple_paired" 72 #elif $input.sam_data_type.sam_selector == "multiple_paired"
65 mkdir paired_collection && 73 mkdir paired_collection &&
66 #for $value, $paired_sam in enumerate($input.sam_data_type.paired_collection): 74 #for $value, $paired_sam in enumerate($input.sam_data_type.paired_collection):
67 #if $paired_sam.forward.ext == 'unsorted.bam' 75 #if $paired_sam.forward.is_of_type("unsorted.bam")
68 samtools view -h $paired_sam.forward > 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier).sam' && 76 samtools view -@ \$addthreads -h $paired_sam.forward > 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier).sam' &&
69 #else 77 #else
70 ln -s '$paired_sam.forward' 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier).sam' && 78 ln -s '$paired_sam.forward' 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier).sam' &&
71 #end if 79 #end if
72 #if $paired_sam.reverse.ext == 'unsorted.bam' 80 #if $paired_sam.reverse.is_of_type("unsorted.bam")
73 samtools view -h $paired_sam.reverse > 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier).sam' && 81 samtools view -@ \$addthreads -h $paired_sam.reverse > 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier).sam' &&
74 #else 82 #else
75 ln -s '$paired_sam.reverse' 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier).sam' && 83 ln -s '$paired_sam.reverse' 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier).sam' &&
76 #end if 84 #end if
77 #if $input.sam_data_type.insert_filter.filter_select == 'filter' 85 #if $input.sam_data_type.insert_filter.filter_select == 'filter'
78 polypolish_insert_filter.py --low '$input.sam_data_type.insert_filter.low' 86 polypolish filter --low '$input.sam_data_type.insert_filter.low'
79 --high '$input.sam_data_type.insert_filter.high' 87 --high '$input.sam_data_type.insert_filter.high'
80 --in1 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier).sam' 88 --in1 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier).sam'
81 --in2 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier).sam' 89 --in2 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier).sam'
82 --out1 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier)_filtered.sam' 90 --out1 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier)_filtered.sam'
83 --out2 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier)_filtered.sam' && 91 --out2 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier)_filtered.sam' &&
85 #end for 93 #end for
86 #*====================================== 94 #*======================================
87 Filtering option 95 Filtering option
88 ======================================*# 96 ======================================*#
89 #if $input.sam_data_type.insert_filter.filter_select == 'filter' 97 #if $input.sam_data_type.insert_filter.filter_select == 'filter'
90 polypolish input_data paired_collection/*_filtered.sam > '$polished_fasta' 98 polypolish polish input_data paired_collection/*_filtered.sam > '$polished_fasta'
91 #else 99 #else
92 polypolish input_data paired_collection/*.sam > '$polished_fasta' 100 polypolish polish input_data paired_collection/*.sam > '$polished_fasta'
93 #end if 101 #end if
94 #end if 102 #end if
95 #*====================================== 103 #*======================================
96 For debug file output 104 For debug file output
97 ======================================*# 105 ======================================*#
106 #end if 114 #end if
107 ]]> 115 ]]>
108 </command> 116 </command>
109 <inputs> 117 <inputs>
110 <section name="input" title="Input sequences" expanded="True"> 118 <section name="input" title="Input sequences" expanded="True">
111 <param name="fasta_file" type="data" format="fasta" label="Select a draft genome for polishing" 119 <param name="fasta_file" type="data" format="fasta" label="Select a draft genome for polishing" help="Fasta sequence to be cleaned using short-reads data"/>
112 help="Fasta sequence to be cleaned using short-reads data"/>
113 <conditional name="sam_data_type"> 120 <conditional name="sam_data_type">
114 <param name="sam_selector" type="select" label="Select aligned data to polish" help="Choose number of aligned sam/bam files. Need aligned file with all possible locations in aligner option"> 121 <param name="sam_selector" type="select" label="Select aligned data to polish" help="Choose number of aligned sam/bam files. Need aligned file with all possible locations in aligner option">
115 <option value="single">Single SAM/BAM file</option> 122 <option value="single">Single SAM/BAM file</option>
116 <option value="paired">Paired SAM/BAM files</option> 123 <option value="paired">Paired SAM/BAM files</option>
117 <option value="multiple_single">Multiple single SAM/BAM files</option> 124 <option value="multiple_single">Multiple single SAM/BAM files</option>
133 <expand macro="filter_option"/> 140 <expand macro="filter_option"/>
134 </when> 141 </when>
135 </conditional> 142 </conditional>
136 </section> 143 </section>
137 <section name="options" title="Options" expanded="False"> 144 <section name="options" title="Options" expanded="False">
138 <param name="min_depth" argument="--min_depth" type="integer" min="0" value="5" label="Minimal depth" 145 <param argument="--min_depth" type="integer" min="0" value="5" label="Minimal depth" help="A base must occur at least this many times in the pileup to be considered valid [default: 5]"/>
139 help="A base must occur at least this many times in the pileup to be considered valid [default: 5]"/> 146 <param argument="--fraction_invalid" type="float" min="0" value="0.2" max="1" label="Minimal invalid fraction" help="A base must make up less than this fraction of the read depth to be considered invalid [default: 0.2]"/>
140 <param name="fraction_invalid" argument="--fraction_invalid" type="float" min="0" value="0.2" max="1" label="Minimal invalid fraction" 147 <param argument="--max_errors" type="integer" min="0" value="10" label="Number of mismatch/indels to ignore alignments" help="Ignore alignments with more than this many mismatches and indels [default: 10]"/>
141 help="A base must make up less than this fraction of the read depth to be considered invalid [default: 0.2]"/> 148 <param argument="--fraction_valid" type="float" min="0" value="0.5" max="1" label="Minimal valid fraction" help="A base must make up at least this fraction of the read depth to be considered valid [default: 0.5"/>
142 <param name="max_errors" argument="--max_errors" type="integer" min="0" value="10" label="Number of mismatch/indels to ignore alignments"
143 help="Ignore alignments with more than this many mismatches and indels [default: 10]"/>
144 <param name="fraction_valid" argument="--fraction_valid" type="float" min="0" value="0.5" max="1" label="Minimal valid fraction"
145 help="A base must make up at least this fraction of the read depth to be considered valid [default: 0.5"/>
146 <param name="keep_logfile" type="boolean" truevalue="true" falsevalue="false" label="Keep log file"/> 149 <param name="keep_logfile" type="boolean" truevalue="true" falsevalue="false" label="Keep log file"/>
147 <param name="debug" argument="--debug" type="boolean" truevalue="true" falsevalue="false" label="Keep per base information file"/> 150 <param argument="--debug" type="boolean" truevalue="true" falsevalue="false" label="Keep per base information file"/>
148 </section> 151 </section>
149 </inputs> 152 </inputs>
150 <outputs> 153 <outputs>
151 <data name="polished_fasta" format="fasta" label="${tool.name} on ${on_string}: polished fasta"/> 154 <data name="polished_fasta" format="fasta" label="${tool.name} on ${on_string}: polished fasta"/>
152 <data name="debug_file" format="tabular" label="${tool.name} on ${on_string}: Per base informations"> 155 <data name="debug_file" format="tabular" label="${tool.name} on ${on_string}: Per base informations">
341 <param name="fasta_file" value="contigs.fa"/> 344 <param name="fasta_file" value="contigs.fa"/>
342 <conditional name="sam_data_type"> 345 <conditional name="sam_data_type">
343 <param name="sam_selector" value="multiple_single"/> 346 <param name="sam_selector" value="multiple_single"/>
344 <param name="single_collection"> 347 <param name="single_collection">
345 <collection type="list"> 348 <collection type="list">
346 <element name="R1_sam" value="aligned_test_file/alignement_R1.bam" ftype="unsorted.bam"/> 349 <element name="R1_sam" value="aligned_test_file/alignement_R1.sam" ftype="sam"/>
347 <element name="R1_bis_sam" value="aligned_test_file/alignement_R1_bis.bam" ftype="unsorted.bam"/> 350 <element name="R1_bis_sam" value="aligned_test_file/alignement_R1_bis.sam" ftype="sam"/>
348 <element name="R1_ter_sam" value="aligned_test_file/alignement_R1_ter.bam" ftype="unsorted.bam"/> 351 <element name="R1_ter_sam" value="aligned_test_file/alignement_R1_ter.sam" ftype="sam"/>
349 </collection> 352 </collection>
350 </param> 353 </param>
351 </conditional> 354 </conditional>
352 </section> 355 </section>
353 <section name="options"> 356 <section name="options">
359 <!-- Test_11 paired-end with bam input --> 362 <!-- Test_11 paired-end with bam input -->
360 <test expect_num_outputs="1"> 363 <test expect_num_outputs="1">
361 <section name="input"> 364 <section name="input">
362 <param name="fasta_file" value="contigs.fa"/> 365 <param name="fasta_file" value="contigs.fa"/>
363 <conditional name="sam_data_type"> 366 <conditional name="sam_data_type">
364 <param name="sam_selector" value="multiple_paired"/> 367 <param name="sam_selector" value="multiple_paired"/>
365 <param name="paired_collection"> 368 <param name="paired_collection">
366 <collection type="list:paired"> 369 <collection type="list:paired">
367 <element name="paired_1"> 370 <element name="paired_1">
368 <collection type="paired"> 371 <collection type="paired">
369 <element name="forward" value="aligned_test_file/alignement_R1.bam" ftype="unsorted.bam"/> 372 <element name="forward" value="aligned_test_file/alignement_R1.bam" ftype="unsorted.bam"/>
370 <element name="reverse" value="aligned_test_file/alignement_R2.bam" ftype="unsorted.bam"/> 373 <element name="reverse" value="aligned_test_file/alignement_R2.bam" ftype="unsorted.bam"/>
371 </collection> 374 </collection>
372 </element> 375 </element>
373 <element name="paired_2"> 376 <element name="paired_2">
374 <collection type="paired"> 377 <collection type="paired">
375 <element name="forward" value="aligned_test_file/alignement_R1_bis.bam" ftype="unsorted.bam"/> 378 <element name="forward" value="aligned_test_file/alignement_R1_bis.bam" ftype="unsorted.bam"/>
376 <element name="reverse" value="aligned_test_file/alignement_R2_bis.bam" ftype="unsorted.bam"/> 379 <element name="reverse" value="aligned_test_file/alignement_R2_bis.bam" ftype="unsorted.bam"/>
377 </collection> 380 </collection>
378 </element> 381 </element>
379 <element name="paired_3"> 382 <element name="paired_3">
380 <collection type="paired"> 383 <collection type="paired">
381 <element name="forward" value="aligned_test_file/alignement_R1_ter.bam" ftype="unsorted.bam"/> 384 <element name="forward" value="aligned_test_file/alignement_R1_ter.bam" ftype="unsorted.bam"/>
382 <element name="reverse" value="aligned_test_file/alignement_R2_ter.bam" ftype="unsorted.bam"/> 385 <element name="reverse" value="aligned_test_file/alignement_R2_ter.bam" ftype="unsorted.bam"/>
383 </collection> 386 </collection>
384 </element> 387 </element>
385 </collection> 388 </collection>
386 </param> 389 </param>
387 </conditional> 390 </conditional>
388 </section> 391 </section>
389 <section name="options"> 392 <section name="options">
390 <param name="debug" value="false"/> 393 <param name="debug" value="false"/>
391 <param name="keep_logfile" value="false"/> 394 <param name="keep_logfile" value="false"/>
392 </section> 395 </section>
393 <output name="polished_fasta" value="polished.fasta"/> 396 <output name="polished_fasta" value="polished.fasta"/>
397 </test>
398 <!-- Test_12 Test with bam files -->
399 <test expect_num_outputs="1">
400 <section name="input">
401 <param name="fasta_file" value="contigs.fa"/>
402 <conditional name="sam_data_type">
403 <param name="sam_selector" value="single"/>
404 <param name="single_sam" value="aligned_test_file/alignement_R1.bam"/>
405 </conditional>
406 </section>
407 <output name="polished_fasta" value="polished.fasta"/>
408 </test>
409 <!-- Test_13 paired bam -->
410 <test expect_num_outputs="2">
411 <section name="input">
412 <param name="fasta_file" value="contigs.fa"/>
413 <conditional name="sam_data_type">
414 <param name="sam_selector" value="paired"/>
415 <param name="R1_sam" value="aligned_test_file/alignement_R1.bam"/>
416 <param name="R2_sam" value="aligned_test_file/alignement_R2.bam"/>
417 </conditional>
418 </section>
419 <section name="options">
420 <param name="debug" value="true"/>
421 <param name="keep_logfile" value="false"/>
422 </section>
423 <output name="polished_fasta" value="polished.fasta"/>
424 <output name="debug_file" value="debug_file_test_2.tsv"/>
394 </test> 425 </test>
395 </tests> 426 </tests>
396 <help><![CDATA[ 427 <help><![CDATA[
397 **What it does** 428 **What it does**
398 Polypolish is a tool for polishing genome assemblies with short reads. 429 Polypolish is a tool for polishing genome assemblies with short reads.
399 Polypolish uses SAM/BAM files where each read has been aligned to all possible locations (not just a single best location). 430 Polypolish uses SAM/BAM files where each read has been aligned to all possible locations (not just a single best location).
400 This allows it to repair errors in repeat regions that other alignment-based polishers cannot fix. 431 This allows it to repair errors in repeat regions that other alignment-based polishers cannot fix.
401 432
402 **Polypolish pipeline steps** 433 **Polypolish pipeline steps**
403 1. [Optional] Filter aligned reads 434 1. [Optional] Filter aligned reads
404 - Exclude some alignments based on their insert size 435 Exclude some alignments based on their insert size
405 - This should reduce the number of excessive alignments, particularly near the edges of repeat sequences, improving Polypolish's ability to fix errors in those regions. 436 This should reduce the number of excessive alignments,
437 particularly near the edges of repeat sequences, improving Polypolish's ability to fix errors in those regions.
406 2. Clean assembly with filtered reads 438 2. Clean assembly with filtered reads
439
407 440
408 **Inputs** 441 **Inputs**
409 Polypolish need SAM/BAM input format obtain from aligner with option to keep all possible location 442 Polypolish need SAM/BAM input format obtain from aligner with option to keep all possible location
410 Polypolish take on or more assembly as input fasta. 443 Polypolish take on or more assembly as input fasta.
411 It need also raw data reads in single or paired-end SAM/BAM format. 444 It need also raw data reads in single or paired-end SAM/BAM format.
412 You can use multiple aligned data to polish the same assembly. 445 You can use multiple aligned data to polish the same assembly.
413 **WARNING It can only work if multiple location information is available in sam/bam files** 446 **WARNING It can only work if multiple location information is available in sam/bam files**
414 For example using bwa mem to align raw data before use, you need : 447 For example using bwa mem to align raw data before use, you need :
415 1. To align each read data independantly (also for paired data) 448 1. To align each read data independantly (also for paired data)
416 2. Set the option "Output all alignments for single-ends or unpaired paired-ends" in Select analysis mode>Set input/output options 449 2. Set the option "Output all alignments for single-ends or unpaired paired-ends" in Select analysis mode>Set input/output options
417 - This allow multiple ailgnemnt output need to use polypolish 450 This allow multiple ailgnemnt output need to use polypolish
418
419
420 ]]></help> 451 ]]></help>
421 <expand macro="citations"/> 452 <expand macro="citations"/>
422 </tool> 453 </tool>