Mercurial > repos > peterjc > count_roi_variants
changeset 1:16defe17fda9 draft
v0.0.2 Cope with pipes in reference name (e.g. NCBI style FASTA naming)
author | peterjc |
---|---|
date | Wed, 17 Feb 2016 11:40:27 -0500 |
parents | 78898a40028b |
children | 56852ae15edd |
files | test-data/SRR639755_mito_pairs_vs_NC_010642_clc.bam test-data/SRR639755_mito_pairs_vs_NC_010642_clc.bam.bai test-data/SRR639755_mito_pairs_vs_NC_010642_clc.count-1695-1725.tabular tools/count_roi_variants/README.rst tools/count_roi_variants/count_roi_variants.py tools/count_roi_variants/count_roi_variants.xml |
diffstat | 6 files changed, 36 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/SRR639755_mito_pairs_vs_NC_010642_clc.count-1695-1725.tabular Wed Feb 17 11:40:27 2016 -0500 @@ -0,0 +1,4 @@ +Variant Count Percentage +AGCCCATGAGATGGGAAGCAATGGGCTACA 14 87.50 +AGCCCATGAGATGGGAAGCAATGGGCTACG 1 6.25 +AGCGCATGAGATGGGAAGCAATGGGCTACG 1 6.25
--- a/tools/count_roi_variants/README.rst Fri Feb 12 11:17:17 2016 -0500 +++ b/tools/count_roi_variants/README.rst Wed Feb 17 11:40:27 2016 -0500 @@ -54,6 +54,7 @@ Version Changes ------- ---------------------------------------------------------------------- v0.0.1 - Initial public release +v0.0.2 - Cope with pipes in reference name (e.g. NCBI style FASTA naming) ======= ====================================================================== @@ -77,7 +78,7 @@ To just build and check the tar ball, use:: - $ planemo shed_upload --tar_only ~/repositories/pico_galaxy/tools/count_roi_variants/ + $ planemo shed_build tools/count_roi_variants/ ... $ tar -tzf shed_upload.tar.gz tools/count_roi_variants/README.rst
--- a/tools/count_roi_variants/count_roi_variants.py Fri Feb 12 11:17:17 2016 -0500 +++ b/tools/count_roi_variants/count_roi_variants.py Wed Feb 17 11:40:27 2016 -0500 @@ -19,7 +19,7 @@ if "-v" in sys.argv or "--version" in sys.argv: # Galaxy seems to invert the order of the two lines - print("BAM coverage statistics v0.0.1") + print("BAM coverage statistics v0.0.2 (using samtools)") cmd = "samtools 2>&1 | grep -i ^Version" sys.exit(os.system(cmd)) @@ -128,11 +128,9 @@ assert list(expand_cigar("ACtGT", decode_cigar("2M1I2M"))) == [(0, "A"), (1, "C"), (1.5, "t"), (2, "G"), (3, "T")] assert list(expand_cigar("tACGT", decode_cigar("1I4M"))) == [(-0.5, 't'), (0, 'A'), (1, 'C'), (2, 'G'), (3, 'T')] assert list(expand_cigar("ACGTt", decode_cigar("4M1I"))) == [(0, 'A'), (1, 'C'), (2, 'G'), (3, 'T'), (3.5, 't')] -assert list(expand_cigar("AAAAGGGGTTTT", decode_cigar("12M"))) == [(0, 'A'), (1, 'A'), (2, 'A'), (3, 'A'), (4, 'G'), (5, 'G\ -'), (6, 'G'), (7, 'G'), (8, 'T'), (9, 'T'), (10, 'T'), (11, 'T')] +assert list(expand_cigar("AAAAGGGGTTTT", decode_cigar("12M"))) == [(0, 'A'), (1, 'A'), (2, 'A'), (3, 'A'), (4, 'G'), (5, 'G'), (6, 'G'), (7, 'G'), (8, 'T'), (9, 'T'), (10, 'T'), (11, 'T')] assert list(expand_cigar("AAAAcGGGGTTTT", decode_cigar("4M1I8M"))) == [(0, 'A'), (1, 'A'), (2, 'A'), (3, 'A'), (3.5, 'c'), (4, 'G'), (5, 'G'), (6, 'G'), (7, 'G'), (8, 'T'), (9, 'T'), (10, 'T'), (11, 'T')] -assert list(expand_cigar("AAAAGGGGcTTTT", decode_cigar("8M1I4M"))) == [(0, 'A'), (1, 'A'), (2, 'A'), (3, 'A'), (4, 'G'), (5, 'G\ -'), (6, 'G'), (7, 'G'), (7.5, "c"), (8, 'T'), (9, 'T'), (10, 'T'), (11, 'T')] +assert list(expand_cigar("AAAAGGGGcTTTT", decode_cigar("8M1I4M"))) == [(0, 'A'), (1, 'A'), (2, 'A'), (3, 'A'), (4, 'G'), (5, 'G'), (6, 'G'), (7, 'G'), (7.5, "c"), (8, 'T'), (9, 'T'), (10, 'T'), (11, 'T')] assert list(expand_cigar("AAAAcGGGGcTTTT", decode_cigar("4M1I4M1I4M"))) == [(0, 'A'), (1, 'A'), (2, 'A'), (3, 'A'), (3.5, 'c'), (4, 'G'), (5, 'G'), (6, 'G'), (7, 'G'), (7.5, 'c'), (8, 'T'), (9, 'T'), (10, 'T'), (11, 'T')] def get_roi(seq, cigar_ops, start, end): @@ -167,7 +165,7 @@ # Call samtools view, don't need header so no -h added. # Only want mapped reads, thus flag filter -F 4. - child = subprocess.Popen(["samtools", "view", "-F", "4", bam_file, region], + child = subprocess.Popen(["/mnt/galaxy/bin/samtools_1.1", "view", "-F", "4", bam_file, region], stdout=subprocess.PIPE) for line in child.stdout: assert line[0] != "@", "Got unexpected SAM header line: %s" % line
--- a/tools/count_roi_variants/count_roi_variants.xml Fri Feb 12 11:17:17 2016 -0500 +++ b/tools/count_roi_variants/count_roi_variants.xml Wed Feb 17 11:40:27 2016 -0500 @@ -1,4 +1,4 @@ -<tool id="count_roi_variants" name="Count sequence variants in region of interest" version="0.0.1"> +<tool id="count_roi_variants" name="Count sequence variants in region of interest" version="0.0.2"> <description>using samtools view</description> <requirements> <requirement type="binary">samtools</requirement> @@ -13,7 +13,22 @@ <command interpreter="python">count_roi_variants.py "$input_bam" "${input_bam.metadata.bam_index}" "$out_tabular" "$region"</command> <inputs> <param name="input_bam" type="data" format="bam" label="Input BAM file" /> - <param name="region" type="text" label="Region of interest" help="Use the reference:start-end syntax as in samtools." /> + <param name="region" type="text" label="Region of interest" help="Use the reference:start-end syntax as in samtools."> + <sanitizer> + <!-- + SAM/BAM spec says the name must match regex [!-)+-<>-~][!-~]* + but will focus on ASCII 33 ("!") to ASCII 126 ("~"), i.e. + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ + of which some are going to be potential trouble like !, ", $, &, ', \, `, {, |, } + which is how I came to this hopefully safe set.. + --> + <valid initial="string.letters,string.digits"> + <add value="#%+,-./:;=@^_|~" /> + <remove value=" " /> + </valid> + <mapping initial="none" /> + </sanitizer> + </param> </inputs> <outputs> <data name="out_tabular" format="tabular" label="$input_bam.name $region variants" /> @@ -35,6 +50,15 @@ <has_line line="Counted 1 variants from 1 reads spanning ref:10-15" /> </assert_stdout> </test> + <!-- This test is a tricky one due to the NCBI's love of pipe characters in identifiers --> + <test> + <param name="input_bam" value="SRR639755_mito_pairs_vs_NC_010642_clc.bam" ftype="bam" /> + <param name="region" value="gi|187250362|ref|NC_010642.1|:1695-1725" /> + <output name="out_tabular" file="SRR639755_mito_pairs_vs_NC_010642_clc.count-1695-1725.tabular" ftype="tabular" /> + <assert_stdout> + <has_line line="Counted 3 variants from 16 reads spanning gi|187250362|ref|NC_010642.1|:1695-1725" /> + </assert_stdout> + </test> </tests> <help> **What it does**