changeset 9:d70e33322da3 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_markdup commit 9c5a35ce695c3d134e41d8695487edd5f71ea33c
author iuc
date Sun, 08 Sep 2024 03:22:51 +0000
parents 95b3cda386a2
children
files macros.xml samtools_markdup.xml test-data/10_markdup.expected.cram test-data/11_markdup.expected.cram test-data/1_markdup.expected.bam test-data/1_markdup.qname_sorted.bam test-data/2_remove_dups.expected.bam test-data/3_mark_supp_dup.expected.bam test-data/6_markdup.expected.bam test-data/7_markdup.expected.bam test-data/8_markdup.expected.bam test-data/9_markdup.expected.sam test-data/stats.txt
diffstat 13 files changed, 40 insertions(+), 49 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Fri Apr 07 21:46:53 2023 +0000
+++ b/macros.xml	Sun Sep 08 03:22:51 2024 +0000
@@ -5,8 +5,15 @@
             <yield/>
         </requirements>
     </xml>
-    <token name="@TOOL_VERSION@">1.15.1</token>
-    <token name="@PROFILE@">20.05</token>
+    <!-- NOTE: for some tools only the version of the requirement but not the
+        tool's version is controlled by the TOOL_VERSION token 
+        (because their version is ahead of the requirement version .. 
+         please only bump the minor version in order to let the requirement
+         version catch up eventually). To find the tools check:
+        `grep "<tool" . -r | grep -v VERSION_SUFFIX | cut -d":" -f 1` -->
+    <token name="@TOOL_VERSION@">1.20</token>
+    <token name="@VERSION_SUFFIX@">2</token>
+    <token name="@PROFILE@">22.05</token>
     <token name="@FLAGS@"><![CDATA[
         #set $flags = 0
         #if $filter
@@ -212,37 +219,7 @@
 
     <xml name="citations">
         <citations>
-            <citation type="bibtex">
-                @misc{SAM_def,
-                title={Definition of SAM/BAM format},
-                url = {https://samtools.github.io/hts-specs/},}
-            </citation>
-            <citation type="doi">10.1093/bioinformatics/btp352</citation>
-            <citation type="doi">10.1093/bioinformatics/btr076</citation>
-            <citation type="doi">10.1093/bioinformatics/btr509</citation>
-            <citation type="bibtex">
-                @misc{Danecek_et_al,
-                Author={Danecek, P., Schiffels, S., Durbin, R.},
-                title={Multiallelic calling model in bcftools (-m)},
-                url = {http://samtools.github.io/bcftools/call-m.pdf},}
-            </citation>
-            <citation type="bibtex">
-                @misc{Durbin_VCQC,
-                Author={Durbin, R.},
-                title={Segregation based metric for variant call QC},
-                url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},}
-            </citation>
-            <citation type="bibtex">
-                @misc{Li_SamMath,
-                Author={Li, H.},
-                title={Mathematical Notes on SAMtools Algorithms},
-                url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},}
-            </citation>
-            <citation type="bibtex">
-                @misc{SamTools_github,
-                title={SAMTools GitHub page},
-                url = {https://github.com/samtools/samtools},}
-            </citation>
+            <citation type="doi">10.1093/gigascience/giab008</citation>
         </citations>
     </xml>
     <xml name="version_command">
--- a/samtools_markdup.xml	Fri Apr 07 21:46:53 2023 +0000
+++ b/samtools_markdup.xml	Sun Sep 08 03:22:51 2024 +0000
@@ -1,4 +1,4 @@
-<tool id="samtools_markdup" name="Samtools markdup" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@" >
+<tool id="samtools_markdup" name="Samtools markdup" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" >
     <description>marks duplicate alignments</description>
     <macros>
         <import>macros.xml</import>
@@ -51,7 +51,7 @@
 '$output'
     ]]></command>
     <inputs>
-        <param name="bamfile" type="data" format="sam,bam,cram" optional="false" label="Alignment" />
+        <param name="bamfile" type="data" format="sam,unsorted.bam,cram" optional="false" label="Alignment" />
         <param name="remove" type="boolean" argument="-r" truevalue="-r" falsevalue="" label="Remove duplicate reads" />
         <param name="supp" type="boolean" argument="-S" truevalue="-S" falsevalue="" label="Mark supplementary reads of duplicates as duplicates" />
         <param name="existing_tags" type="boolean" argument="-c" truevalue="-c" falsevalue="" label="Clear previous duplicate settings and tags." />
@@ -108,9 +108,12 @@
         </test>
         <!-- 3) -->
         <test expect_num_outputs="1">
-            <param name="bamfile" value="3_mark_supp_dup.bam" ftype="bam"  />
+            <param name="bamfile" value="3_mark_supp_dup.bam" ftype="bam" />
             <param name="supp" value="-S" />
             <output name="output" file="3_mark_supp_dup.expected.bam" ftype="bam" lines_diff="4" />
+            <assert_command>
+                <has_text text="samtools sort" negate="true"/>
+            </assert_command>
         </test>
         <!-- 4) test stats output -->
         <test expect_num_outputs="2">
@@ -120,17 +123,12 @@
             <output name="stats_output" file="stats.txt" lines_diff="2" />
         </test>
         <!-- 5) check that stderr is not swallowed w test data from fixmate  -->
-        <test expect_num_outputs="2" expect_exit_code="1" expect_failure="true">
+        <test expect_exit_code="1" expect_failure="true">
             <param name="bamfile" value="3_two_read_mapped.sam" ftype="sam" />
             <param name="stats" value="yes"/>
-            <!-- for some reason this is not possible at the moment
-            <output name="stats_output">
-                <assert_contents>
-                    <has_line line="[markdup] error: no MC tag. Please run samtools fixmate on file first."/>
-                </assert_contents>
-            </output> -->
             <assert_stderr>
-                <has_line line="[markdup] error: no MC tag. Please run samtools fixmate on file first."/>
+                <has_line line="samtools markdup: error, no MC tag. Please run samtools fixmate on file first."/>
+                <has_line line="samtools markdup: error, unable to assign pair hash key."/>
             </assert_stderr>
         </test>
         <!-- 6) check optical distance and check -c option -->
@@ -158,12 +156,27 @@
             <param name="select_oformat" value="SAM" />
             <output name="output" file="9_markdup.expected.sam" ftype="sam" lines_diff="4" />
         </test>
-        <!-- 10) test cram format -->
+        <!-- 10) essentially the same as test 9 (just converted input to sorted bam .. but telling Galaxy its qname sorted)
+                to test qname sorted bam format and ensure that sorting happens in the tool
+                ie. the qname_sorted bam is not converted implicitly -->
+        <test expect_num_outputs="1">
+            <param name="bamfile" value="1_markdup.qname_sorted.bam" ftype="qname_sorted.bam" />
+            <param name="select_oformat" value="SAM" />
+            <output name="output" file="9_markdup.expected.sam" lines_diff="4" />
+            <assert_command>
+                <has_text text="samtools sort"/>
+            </assert_command>
+        </test>
+
+        <!-- 11) test cram format -->
         <test expect_num_outputs="1">
             <param name="bamfile" value="10_markdup.sam" ftype="sam"/>
             <param name="select_oformat" value="CRAM" />
             <param name="ref_file" value="test.fa" />
-            <output name="output" file="10_markdup.expected.cram" ftype="cram" compare="sim_size" delta="250"/>
+            <output name="output" file="11_markdup.expected.cram" ftype="cram" compare="sim_size" delta="250"/>
+            <assert_command>
+                <has_text text="samtools sort"/>
+            </assert_command>
         </test>
     </tests>
     <help>
Binary file test-data/10_markdup.expected.cram has changed
Binary file test-data/11_markdup.expected.cram has changed
Binary file test-data/1_markdup.expected.bam has changed
Binary file test-data/1_markdup.qname_sorted.bam has changed
Binary file test-data/2_remove_dups.expected.bam has changed
Binary file test-data/3_mark_supp_dup.expected.bam has changed
Binary file test-data/6_markdup.expected.bam has changed
Binary file test-data/7_markdup.expected.bam has changed
Binary file test-data/8_markdup.expected.bam has changed
--- a/test-data/9_markdup.expected.sam	Fri Apr 07 21:46:53 2023 +0000
+++ b/test-data/9_markdup.expected.sam	Sun Sep 08 03:22:51 2024 +0000
@@ -1,7 +1,7 @@
 @HD	VN:1.4	SO:coordinate
 @SQ	SN:contig_000000000	LN:11391
-@PG	ID:samtools	PN:samtools	VN:1.12	CL:samtools sort -@ 0 -m 768M -T . -O sam -o coordsort.sam /tmp/tmpuzvw6tdp/files/b/a/d/dataset_badf390c-6dae-4603-937a-f000db10e1eb.dat
-@PG	ID:samtools.1	PN:samtools	PP:samtools	VN:1.12	CL:samtools markdup -@ 0 -m t -O SAM coordsort.sam /tmp/tmpuzvw6tdp/files/c/4/1/dataset_c4121439-d0e4-4e9a-b597-951d44bf57bc.dat
+@PG	ID:samtools	PN:samtools	VN:1.20	CL:samtools sort -@ 0 -m 768M -T /tmp/tmpo1hsz7dc/tmp -O sam -o coordsort.sam /tmp/tmpo1hsz7dc/files/1/a/f/dataset_1afa2f60-62b8-4548-bb89-e30d23322a3e.dat
+@PG	ID:samtools.1	PN:samtools	PP:samtools	VN:1.20	CL:samtools markdup -@ 0 -m t -O SAM coordsort.sam /tmp/tmpo1hsz7dc/job_working_directory/000/20/outputs/dataset_5a651845-a763-4ddc-ac05-3734afca2130.dat
 entry1	1187	contig_000000000	109	60	250M	=	137	278	GATTGATATTTATTTATTATTTTATTATGTTTATTTCTTTATTTATTATCATTATTATTATTATTCTTATTATTGTTATATAAAAACATCGTAAACACAGTAAACGATAGTACTAATACTACTACTAATAAAGATAGATTTTTTTATATATATATATGTATGATCTTTTAACGTTACTTATTCAAATGCTATGTCATTTTGTAATATTTGTCATGGCAAGTATCAAACTGCTTCGGTTCTCATTGATTAG	1111>DD3DFFF3B333B3FBG3D3A33BG3D3F3333AFG3DF3D33B22D22222222D2B2A2ADE2AA2DAG222BD22D11//11//00B110ABB2FD1?>/>A2@2@2@F2@F21GB11FDDF21111111B2B11>/?1FB22>>>22>B2BG22B12B>F>11/0<0/2B2222B2@G11>22@22@G222201?1??1<?DF1FG1001<1>1F1=11>111>1-....000=0000=00	NM:i:18	AS:i:164	XS:i:0	MQ:i:60	MC:Z:250M	ms:i:6744
 entry2	163	contig_000000000	109	60	250M	=	137	278	TATTGATATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATACAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCATGTATCAAACTGCCTGGGTTATCATTGATTAG	BBBBB5F5DFFFGGGFGGGGGGHHHHHHHHHGHHGHHHHHHHHHHHHGGHHHHHHHEHHGHHGHHFHHHHHGHHHHHFHHHD5A33FBGBFHGGGFGGEGGGBGGAEGAEHH5GD5FEGFD5GGFGHFHE4GHGGHHHHHHHDEEA?FGHHGHGHFBEFFHEGHH4GGHHCFFFHHFHHHHHHBFHG1FFBFF01BGHHHFCFF@1GHHBGGFHFF1?1?FBGGF11FD110FG.<FFGGH1FGH0DG00	NM:i:2	AS:i:240	XS:i:0	MQ:i:60	MC:Z:250M	ms:i:9279
 entry3	99	contig_000000000	116	60	250M	=	222	356	ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG	BBBBBFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHGGHGGHGHHHHHGHHHHHHHHHFHHHGHHGHHHHGHHHGGHHHHHHHHHHHHHGHHHHHGHHHHIHHHIIHHHHHHHGHGHHHGHHHHGHHHGHHGHHHHGHHHHHHGHBFFHHHHHHHGHHHGHEHHHFFGGHHHHHHHEHHHFH=GHHHF	NM:i:0	AS:i:250	XS:i:0	MQ:i:60	MC:Z:250M	ms:i:9509
--- a/test-data/stats.txt	Fri Apr 07 21:46:53 2023 +0000
+++ b/test-data/stats.txt	Sun Sep 08 03:22:51 2024 +0000
@@ -1,4 +1,4 @@
-COMMAND: samtools markdup -@ 0 -s coordsort.sam /tmp/tmpea8lprdd/files/2/2/4/dataset_224df993-f6bf-4c7d-99a1-2e9dd51c2a57.dat
+COMMAND: samtools markdup -@ 0 -m t -s -f /tmp/tmpo1hsz7dc/job_working_directory/000/8/outputs/dataset_9e69bd59-3165-4ede-92bf-2f438627a6a1.dat -O BAM coordsort.sam /tmp/tmpo1hsz7dc/job_working_directory/000/8/outputs/dataset_ccf59cfe-ffbd-41d5-9412-d7a7ac6e65d9.dat
 READ: 16
 WRITTEN: 16
 EXCLUDED: 3
@@ -14,3 +14,4 @@
 DUPLICATE PRIMARY TOTAL: 6
 DUPLICATE TOTAL: 6
 ESTIMATED_LIBRARY_SIZE: 4
+