Mercurial > repos > iuc > samtools_markdup
changeset 9:d70e33322da3 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_markdup commit 9c5a35ce695c3d134e41d8695487edd5f71ea33c
| author | iuc |
|---|---|
| date | Sun, 08 Sep 2024 03:22:51 +0000 |
| parents | 95b3cda386a2 |
| children | |
| files | macros.xml samtools_markdup.xml test-data/10_markdup.expected.cram test-data/11_markdup.expected.cram test-data/1_markdup.expected.bam test-data/1_markdup.qname_sorted.bam test-data/2_remove_dups.expected.bam test-data/3_mark_supp_dup.expected.bam test-data/6_markdup.expected.bam test-data/7_markdup.expected.bam test-data/8_markdup.expected.bam test-data/9_markdup.expected.sam test-data/stats.txt |
| diffstat | 13 files changed, 40 insertions(+), 49 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Fri Apr 07 21:46:53 2023 +0000 +++ b/macros.xml Sun Sep 08 03:22:51 2024 +0000 @@ -5,8 +5,15 @@ <yield/> </requirements> </xml> - <token name="@TOOL_VERSION@">1.15.1</token> - <token name="@PROFILE@">20.05</token> + <!-- NOTE: for some tools only the version of the requirement but not the + tool's version is controlled by the TOOL_VERSION token + (because their version is ahead of the requirement version .. + please only bump the minor version in order to let the requirement + version catch up eventually). To find the tools check: + `grep "<tool" . -r | grep -v VERSION_SUFFIX | cut -d":" -f 1` --> + <token name="@TOOL_VERSION@">1.20</token> + <token name="@VERSION_SUFFIX@">2</token> + <token name="@PROFILE@">22.05</token> <token name="@FLAGS@"><![CDATA[ #set $flags = 0 #if $filter @@ -212,37 +219,7 @@ <xml name="citations"> <citations> - <citation type="bibtex"> - @misc{SAM_def, - title={Definition of SAM/BAM format}, - url = {https://samtools.github.io/hts-specs/},} - </citation> - <citation type="doi">10.1093/bioinformatics/btp352</citation> - <citation type="doi">10.1093/bioinformatics/btr076</citation> - <citation type="doi">10.1093/bioinformatics/btr509</citation> - <citation type="bibtex"> - @misc{Danecek_et_al, - Author={Danecek, P., Schiffels, S., Durbin, R.}, - title={Multiallelic calling model in bcftools (-m)}, - url = {http://samtools.github.io/bcftools/call-m.pdf},} - </citation> - <citation type="bibtex"> - @misc{Durbin_VCQC, - Author={Durbin, R.}, - title={Segregation based metric for variant call QC}, - url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},} - </citation> - <citation type="bibtex"> - @misc{Li_SamMath, - Author={Li, H.}, - title={Mathematical Notes on SAMtools Algorithms}, - url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},} - </citation> - <citation type="bibtex"> - @misc{SamTools_github, - title={SAMTools GitHub page}, - url = {https://github.com/samtools/samtools},} - </citation> + <citation type="doi">10.1093/gigascience/giab008</citation> </citations> </xml> <xml name="version_command">
--- a/samtools_markdup.xml Fri Apr 07 21:46:53 2023 +0000 +++ b/samtools_markdup.xml Sun Sep 08 03:22:51 2024 +0000 @@ -1,4 +1,4 @@ -<tool id="samtools_markdup" name="Samtools markdup" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@" > +<tool id="samtools_markdup" name="Samtools markdup" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" > <description>marks duplicate alignments</description> <macros> <import>macros.xml</import> @@ -51,7 +51,7 @@ '$output' ]]></command> <inputs> - <param name="bamfile" type="data" format="sam,bam,cram" optional="false" label="Alignment" /> + <param name="bamfile" type="data" format="sam,unsorted.bam,cram" optional="false" label="Alignment" /> <param name="remove" type="boolean" argument="-r" truevalue="-r" falsevalue="" label="Remove duplicate reads" /> <param name="supp" type="boolean" argument="-S" truevalue="-S" falsevalue="" label="Mark supplementary reads of duplicates as duplicates" /> <param name="existing_tags" type="boolean" argument="-c" truevalue="-c" falsevalue="" label="Clear previous duplicate settings and tags." /> @@ -108,9 +108,12 @@ </test> <!-- 3) --> <test expect_num_outputs="1"> - <param name="bamfile" value="3_mark_supp_dup.bam" ftype="bam" /> + <param name="bamfile" value="3_mark_supp_dup.bam" ftype="bam" /> <param name="supp" value="-S" /> <output name="output" file="3_mark_supp_dup.expected.bam" ftype="bam" lines_diff="4" /> + <assert_command> + <has_text text="samtools sort" negate="true"/> + </assert_command> </test> <!-- 4) test stats output --> <test expect_num_outputs="2"> @@ -120,17 +123,12 @@ <output name="stats_output" file="stats.txt" lines_diff="2" /> </test> <!-- 5) check that stderr is not swallowed w test data from fixmate --> - <test expect_num_outputs="2" expect_exit_code="1" expect_failure="true"> + <test expect_exit_code="1" expect_failure="true"> <param name="bamfile" value="3_two_read_mapped.sam" ftype="sam" /> <param name="stats" value="yes"/> - <!-- for some reason this is not possible at the moment - <output name="stats_output"> - <assert_contents> - <has_line line="[markdup] error: no MC tag. Please run samtools fixmate on file first."/> - </assert_contents> - </output> --> <assert_stderr> - <has_line line="[markdup] error: no MC tag. Please run samtools fixmate on file first."/> + <has_line line="samtools markdup: error, no MC tag. Please run samtools fixmate on file first."/> + <has_line line="samtools markdup: error, unable to assign pair hash key."/> </assert_stderr> </test> <!-- 6) check optical distance and check -c option --> @@ -158,12 +156,27 @@ <param name="select_oformat" value="SAM" /> <output name="output" file="9_markdup.expected.sam" ftype="sam" lines_diff="4" /> </test> - <!-- 10) test cram format --> + <!-- 10) essentially the same as test 9 (just converted input to sorted bam .. but telling Galaxy its qname sorted) + to test qname sorted bam format and ensure that sorting happens in the tool + ie. the qname_sorted bam is not converted implicitly --> + <test expect_num_outputs="1"> + <param name="bamfile" value="1_markdup.qname_sorted.bam" ftype="qname_sorted.bam" /> + <param name="select_oformat" value="SAM" /> + <output name="output" file="9_markdup.expected.sam" lines_diff="4" /> + <assert_command> + <has_text text="samtools sort"/> + </assert_command> + </test> + + <!-- 11) test cram format --> <test expect_num_outputs="1"> <param name="bamfile" value="10_markdup.sam" ftype="sam"/> <param name="select_oformat" value="CRAM" /> <param name="ref_file" value="test.fa" /> - <output name="output" file="10_markdup.expected.cram" ftype="cram" compare="sim_size" delta="250"/> + <output name="output" file="11_markdup.expected.cram" ftype="cram" compare="sim_size" delta="250"/> + <assert_command> + <has_text text="samtools sort"/> + </assert_command> </test> </tests> <help>
--- a/test-data/9_markdup.expected.sam Fri Apr 07 21:46:53 2023 +0000 +++ b/test-data/9_markdup.expected.sam Sun Sep 08 03:22:51 2024 +0000 @@ -1,7 +1,7 @@ @HD VN:1.4 SO:coordinate @SQ SN:contig_000000000 LN:11391 -@PG ID:samtools PN:samtools VN:1.12 CL:samtools sort -@ 0 -m 768M -T . -O sam -o coordsort.sam /tmp/tmpuzvw6tdp/files/b/a/d/dataset_badf390c-6dae-4603-937a-f000db10e1eb.dat -@PG ID:samtools.1 PN:samtools PP:samtools VN:1.12 CL:samtools markdup -@ 0 -m t -O SAM coordsort.sam /tmp/tmpuzvw6tdp/files/c/4/1/dataset_c4121439-d0e4-4e9a-b597-951d44bf57bc.dat +@PG ID:samtools PN:samtools VN:1.20 CL:samtools sort -@ 0 -m 768M -T /tmp/tmpo1hsz7dc/tmp -O sam -o coordsort.sam /tmp/tmpo1hsz7dc/files/1/a/f/dataset_1afa2f60-62b8-4548-bb89-e30d23322a3e.dat +@PG ID:samtools.1 PN:samtools PP:samtools VN:1.20 CL:samtools markdup -@ 0 -m t -O SAM coordsort.sam /tmp/tmpo1hsz7dc/job_working_directory/000/20/outputs/dataset_5a651845-a763-4ddc-ac05-3734afca2130.dat entry1 1187 contig_000000000 109 60 250M = 137 278 GATTGATATTTATTTATTATTTTATTATGTTTATTTCTTTATTTATTATCATTATTATTATTATTCTTATTATTGTTATATAAAAACATCGTAAACACAGTAAACGATAGTACTAATACTACTACTAATAAAGATAGATTTTTTTATATATATATATGTATGATCTTTTAACGTTACTTATTCAAATGCTATGTCATTTTGTAATATTTGTCATGGCAAGTATCAAACTGCTTCGGTTCTCATTGATTAG 1111>DD3DFFF3B333B3FBG3D3A33BG3D3F3333AFG3DF3D33B22D22222222D2B2A2ADE2AA2DAG222BD22D11//11//00B110ABB2FD1?>/>A2@2@2@F2@F21GB11FDDF21111111B2B11>/?1FB22>>>22>B2BG22B12B>F>11/0<0/2B2222B2@G11>22@22@G222201?1??1<?DF1FG1001<1>1F1=11>111>1-....000=0000=00 NM:i:18 AS:i:164 XS:i:0 MQ:i:60 MC:Z:250M ms:i:6744 entry2 163 contig_000000000 109 60 250M = 137 278 TATTGATATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATACAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCATGTATCAAACTGCCTGGGTTATCATTGATTAG BBBBB5F5DFFFGGGFGGGGGGHHHHHHHHHGHHGHHHHHHHHHHHHGGHHHHHHHEHHGHHGHHFHHHHHGHHHHHFHHHD5A33FBGBFHGGGFGGEGGGBGGAEGAEHH5GD5FEGFD5GGFGHFHE4GHGGHHHHHHHDEEA?FGHHGHGHFBEFFHEGHH4GGHHCFFFHHFHHHHHHBFHG1FFBFF01BGHHHFCFF@1GHHBGGFHFF1?1?FBGGF11FD110FG.<FFGGH1FGH0DG00 NM:i:2 AS:i:240 XS:i:0 MQ:i:60 MC:Z:250M ms:i:9279 entry3 99 contig_000000000 116 60 250M = 222 356 ATTTATTTATTATTTTATTATGTTTATTTATTTATTTATTATCATTATTATTATTATTATTATTATTGTTATATAAAAACATAGTAAACACAGTAAACGATAGTAGTAATACTACTACTAATAAATATATATTTTTTTATATATATATATGTATGTTCTTTTAATGTTAATTTTTCAAATGCTTTGGCATTTTGTAACATTTGTCATGCCAAGTATCAAACTGCCTGGGTTATCATTGATTAGGTACTAG BBBBBFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHGGHGGHGHHHHHGHHHHHHHHHFHHHGHHGHHHHGHHHGGHHHHHHHHHHHHHGHHHHHGHHHHIHHHIIHHHHHHHGHGHHHGHHHHGHHHGHHGHHHHGHHHHHHGHBFFHHHHHHHGHHHGHEHHHFFGGHHHHHHHEHHHFH=GHHHF NM:i:0 AS:i:250 XS:i:0 MQ:i:60 MC:Z:250M ms:i:9509
--- a/test-data/stats.txt Fri Apr 07 21:46:53 2023 +0000 +++ b/test-data/stats.txt Sun Sep 08 03:22:51 2024 +0000 @@ -1,4 +1,4 @@ -COMMAND: samtools markdup -@ 0 -s coordsort.sam /tmp/tmpea8lprdd/files/2/2/4/dataset_224df993-f6bf-4c7d-99a1-2e9dd51c2a57.dat +COMMAND: samtools markdup -@ 0 -m t -s -f /tmp/tmpo1hsz7dc/job_working_directory/000/8/outputs/dataset_9e69bd59-3165-4ede-92bf-2f438627a6a1.dat -O BAM coordsort.sam /tmp/tmpo1hsz7dc/job_working_directory/000/8/outputs/dataset_ccf59cfe-ffbd-41d5-9412-d7a7ac6e65d9.dat READ: 16 WRITTEN: 16 EXCLUDED: 3 @@ -14,3 +14,4 @@ DUPLICATE PRIMARY TOTAL: 6 DUPLICATE TOTAL: 6 ESTIMATED_LIBRARY_SIZE: 4 +
