changeset 11:2b63b5400447 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/unicycler commit bc948510a50635ce98123c9e09ae09b39f1e5cee
author iuc
date Sun, 07 Jul 2024 19:46:32 +0000
parents 906f76188535
children 0ea8ca62e8e6
files test-data/phix__spades_graph.gfa1 unicycler.xml
diffstat 2 files changed, 98 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phix__spades_graph.gfa1	Sun Jul 07 19:46:32 2024 +0000
@@ -0,0 +1,5 @@
+S	1	CAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGTCCCTAGTTTTGTTTCTGGTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGAGATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGACCAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCAAACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTTCTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCGTCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTTCTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTATTGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGCATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCCCTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATTGCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTTTGATGAATGCAATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTTATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCGCAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGCCGTCTTCATTTCCATGCGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTCGTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAGCCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATATGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACTTCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTGTCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGCAGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACCTGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCAGAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTTGATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTAGATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATCTGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTTCGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTAGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTACGGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTACGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAGTGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACTAAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGCCCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCATCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGACTCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTTGGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACAACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGCTCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGCATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCATGATGTTATTTCTTCATTTGGAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGCCGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGCTTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGTTCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTGTCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGCCTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTGGATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGCCGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGTTTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGG	LN:i:5513	dp:f:1.0
+S	2	CCCATGCCTACAGTATTGTTATCGGTAGCAAGCACATCCCCTTGAATGCCACCGGAGGCGGCTTTTTGACCGCCTCCAAACAATTTAGACATGGCGCCACCAGCAAGAGCAGAAGCAATACCGCCAGCAATAGCACCAAACATAAATCACCTCACTTAAGTGGCTG	LN:i:166	dp:f:0.0848319778258616
+L	1	+	1	+	127M
+L	2	+	1	-	127M
+i	367.256	37.4267
--- a/unicycler.xml	Fri Oct 21 16:02:10 2022 +0000
+++ b/unicycler.xml	Sun Jul 07 19:46:32 2024 +0000
@@ -2,65 +2,67 @@
 <description>pipeline for bacterial genomes</description>
     <macros>
         <token name="@TOOL_VERSION@">0.5.0</token>
-        <token name="@VERSION_SUFFIX@">1</token>
+        <token name="@VERSION_SUFFIX@">2</token>
     </macros>
-    <xrefs>
-        <xref type="bio.tools">unicycler</xref>
-    </xrefs>
     <edam_topics>
         <edam_topic>topic_0196</edam_topic>
     </edam_topics>
     <edam_operations>
         <edam_operation>operation_0525</edam_operation>
     </edam_operations>
+    <xrefs>
+        <xref type="bio.tools">unicycler</xref>
+    </xrefs>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">unicycler</requirement>
         <requirement type="package" version="1.15.1">samtools</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
+#for r in $reuse
+    ln -s $r.reuse_file ${r.reuse_step}.gfa &&
+#end for
+
 ## Preparing files
-#set $uncompressed = ('fastqsanger','fastq')
-#set $compressed = ('fastqsanger.gz','fastq.gz')
 #if str( $paired_unpaired.fastq_input_selector ) == "paired"
-    #if $paired_unpaired.fastq_input1.file_ext in $uncompressed
+    #if $paired_unpaired.fastq_input1.is_of_type("fastq.gz"):
+        #set fq1 = "fq1.fastq.gz"
+    #else
         #set fq1 = "fq1.fastq"
-    #elif $paired_unpaired.fastq_input1.file_ext in $compressed
-        #set fq1 = "fq1.fastq.gz"
     #end if
-    #if $paired_unpaired.fastq_input2.file_ext in $uncompressed
+    #if $paired_unpaired.fastq_input2.is_of_type("fastq.gz"):
+        #set fq2 = "fq2.fastq.gz"
+    #else
         #set fq2 = "fq2.fastq"
-    #elif $paired_unpaired.fastq_input2.file_ext in $compressed
-        #set fq2 = "fq2.fastq.gz"
     #end if
     ln -s '${paired_unpaired.fastq_input1}' $fq1 &&
     ln -s '${paired_unpaired.fastq_input2}' $fq2 &&
 #elif str( $paired_unpaired.fastq_input_selector ) == "paired_collection"
-    #if $paired_unpaired.fastq_input1.forward.file_ext in $uncompressed
+    #if $paired_unpaired.fastq_input1.forward.is_of_type("fastq.gz"):
+        #set fq1 = "fq1.fastq.gz"
+    #else
         #set fq1 = "fq1.fastq"
-    #elif $paired_unpaired.fastq_input1.forward.file_ext in $compressed
-        #set fq1 = "fq1.fastq.gz"
     #end if
-    #if $paired_unpaired.fastq_input1.reverse.file_ext in $uncompressed
+    #if $paired_unpaired.fastq_input1.reverse.is_of_type("fastq.gz"):
+        #set fq2 = "fq2.fastq.gz"
+    #else
         #set fq2 = "fq2.fastq"
-    #elif $paired_unpaired.fastq_input1.reverse.file_ext in $compressed
-        #set fq2 = "fq2.fastq.gz"
     #end if
     ln -s '${paired_unpaired.fastq_input1.forward}' $fq1 &&
     ln -s '${paired_unpaired.fastq_input1.reverse}' $fq2 &&
 #elif str( $paired_unpaired.fastq_input_selector ) == "single"
-    #if $paired_unpaired.fastq_input1.file_ext in $uncompressed
+    #if $paired_unpaired.fastq_input1.is_of_type("fastqsanger.gz"):
+        #set fq = "fq.fastq.gz"
+    #else
         #set fq = "fq.fastq"
-    #elif $paired_unpaired.fastq_input1.file_ext in $compressed
-        #set fq = "fq.fastq.gz"
     #end if
     ln -s '${paired_unpaired.fastq_input1}' '$fq' &&
 #end if
 #if $long
-    #if $long.file_ext in $uncompressed
+    #if $long.is_of_type("fastq"):
         #set lr = "lr.fastq"
-    #elif $long.file_ext in $compressed
+    #elif $long.is_of_type("fastq.gz"):
         #set lr = "lr.fastq.gz"
-    #elif $long.is_of_type('fasta')
+    #elif $long.is_of_type("fasta")
         #set lr = "lr.fasta"
     #end if
     ln -s '${long}' '$lr' &&
@@ -208,12 +210,20 @@
             <option value="1" selected="true">1: save graphs at main checkpoints</option>
             <option value="2">2: also keep SAM</option>
         </param>
+        <repeat name="reuse" title="Reuse checkpoint files from earlier runs" max="1" help="">
+            <param name="reuse_file" type="data" optional="false" format="gfa1" label="Checkpoint file"/>
+            <param name="reuse_step" type="select" label="Checkpoint">
+                <option value="002_depth_filter">002_depth_filter</option>
+                <option value="003_overlaps_removed">003_overlaps_removed</option>
+                <option value="004_bridges_applied">004_bridges_applied</option>
+            </param>
+        </repeat>
     </inputs>
     <outputs>
         <data name="assembly_graph" format="gfa1" from_work_dir="assembly.gfa" label="${tool.name} on ${on_string}: Final Assembly Graph" />
         <data name="assembly" format="fasta" from_work_dir="assembly.fasta" label="${tool.name} on ${on_string}: Final Assembly"/>
         <collection name="spades_collection" type="list" label="${tool.name} on ${on_string}: SPAdes graphs">
-            <discover_datasets pattern="__designation_and_ext__" format="gfa1" directory="spades_graphs"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gfa" format="gfa1" directory="spades_graphs"/>
             <filter>keep != "0"</filter>
         </collection>
         <data name="bam_file" format="bam" from_work_dir="read_alignment/long_read_alignments.bam" label="${tool.name} on ${on_string}: Long read alignments BAM">
@@ -375,6 +385,35 @@
                 </assert_contents>
             </output>
         </test>
+        <!-- test checkpoint graph reuse 
+             TODO more precise test and check difference to call wo reuse -->
+        <test expect_num_outputs="2">
+            <conditional name="paired_unpaired">
+                <param name="fastq_input_selector" value="paired_collection"/>
+                <param name="fastq_input1">
+                    <collection type="paired">
+                        <element name="forward" value="phix_f.fq.gz" ftype="fastqsanger" />
+                        <element name="reverse" value="phix_r.fq.gz" ftype="fastqsanger" />
+                    </collection>
+                </param>
+            </conditional>
+            <param name="long" value="only_long.fasta" ftype="fasta" />
+            <repeat name="reuse">
+                <param name="reuse_file" value="phix__spades_graph.gfa1"/>
+                <param name="reuse_step" value="002_depth_filter"/>
+            </repeat>
+            <param name="keep" value="0"/>
+            <output name="assembly_graph" ftype="gfa1">
+                <assert_contents>
+                    <has_text text="S" />
+                </assert_contents>
+            </output>
+            <output name="assembly" ftype="fasta">
+                <assert_contents>
+                    <has_text text=">1" />
+                </assert_contents>
+            </output>
+        </test>
         <!-- Test keep value = 1 -->
         <test expect_num_outputs="3">
             <conditional name="paired_unpaired">
@@ -428,6 +467,35 @@
                         <has_text text="TTGAATGCCACCGGAGGCGGCTTTTTGACCGCCTCCAAAC"/>
                     </assert_contents>
                 </element>
+                <!-- there are gfa files for more k that are not tested explicily
+                     Aim of testing these is to be sure about the names of the graphs,
+                     since they are used for reuse. Hence if there is a change here
+                     update reuse accordingly-->
+                <element name="001_spades_graph_k127">
+                    <assert_contents>
+                        <has_line_matching expression="^S.*"/>
+                    </assert_contents>
+                </element>
+                <element name="002_depth_filter">
+                    <assert_contents>
+                        <has_line_matching expression="^S.*"/>
+                    </assert_contents>
+                </element>
+                <element name="003_overlaps_removed">
+                    <assert_contents>
+                        <has_line_matching expression="^S.*"/>
+                    </assert_contents>
+                </element>
+                <element name="004_bridges_applied">
+                    <assert_contents>
+                        <has_line_matching expression="^S.*"/>
+                    </assert_contents>
+                </element>
+                <element name="005_final_clean">
+                    <assert_contents>
+                        <has_line_matching expression="^S.*"/>
+                    </assert_contents>
+                </element>
             </output_collection>
             <output name="bam_file" ftype="bam">
                 <assert_contents>