Mercurial > repos > peterjc > count_roi_variants
changeset 14:1446811c3fe8 draft
planemo upload for repository https://github.com/peterjc/pico_galaxy/tree/master/tools/count_roi_variants commit d67596914a7bbe183851437eaafe8c7305877e5a-dirty
author | peterjc |
---|---|
date | Fri, 22 Feb 2019 10:11:49 -0500 |
parents | 4554d79f238b |
children | 4f71065fcf0f |
files | tools/count_roi_variants/count_roi_variants.py tools/count_roi_variants/tool_dependencies.xml |
diffstat | 2 files changed, 142 insertions(+), 51 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/count_roi_variants/count_roi_variants.py Fri Nov 09 10:46:50 2018 -0500 +++ b/tools/count_roi_variants/count_roi_variants.py Fri Feb 22 10:11:49 2019 -0500 @@ -99,9 +99,17 @@ return answer -assert decode_cigar("14S15M1P1D3P54M1D34M5S") == [(14, 'S'), (15, 'M'), (1, 'P'), - (1, 'D'), (3, 'P'), (54, 'M'), - (1, 'D'), (34, 'M'), (5, 'S')] +assert decode_cigar("14S15M1P1D3P54M1D34M5S") == [ + (14, "S"), + (15, "M"), + (1, "P"), + (1, "D"), + (3, "P"), + (54, "M"), + (1, "D"), + (34, "M"), + (5, "S"), +] def align_len(cigar_ops): @@ -115,14 +123,14 @@ seq_offset = 0 for count, op in cigar_ops: if op in "MX=": - for (i, base) in enumerate(seq[seq_offset:seq_offset + count]): + for (i, base) in enumerate(seq[seq_offset : seq_offset + count]): yield ref_offset + i, base ref_offset += count seq_offset += count elif op == "I": # Give them all an in-between reference position # (Python lets us mix integers and floats, wouldn't work in C) - for (i, base) in enumerate(seq[seq_offset:seq_offset + count]): + for (i, base) in enumerate(seq[seq_offset : seq_offset + count]): yield ref_offset - 0.5, base # Does not change ref_offset seq_offset += count @@ -143,41 +151,105 @@ raise NotImplementedError("Unexpected CIGAR operator %s" % op) -assert list(expand_cigar("ACGT", decode_cigar("4M"))) == \ - [(0, "A"), (1, "C"), (2, "G"), (3, "T")] -assert list(expand_cigar("ACGT", decode_cigar("2=1X1="))) == \ - [(0, "A"), (1, "C"), (2, "G"), (3, "T")] -assert list(expand_cigar("ACGT", decode_cigar("2M1D2M"))) == \ - [(0, "A"), (1, "C"), (3, "G"), (4, "T")] -assert list(expand_cigar("ACtGT", decode_cigar("2M1I2M"))) == \ - [(0, "A"), (1, "C"), (1.5, "t"), (2, "G"), (3, "T")] -assert list(expand_cigar("tACGT", decode_cigar("1I4M"))) == \ - [(-0.5, 't'), (0, 'A'), (1, 'C'), (2, 'G'), (3, 'T')] -assert list(expand_cigar("ACGTt", decode_cigar("4M1I"))) == \ - [(0, 'A'), (1, 'C'), (2, 'G'), (3, 'T'), (3.5, 't')] -assert list(expand_cigar("AAAAGGGGTTTT", decode_cigar("12M"))) == \ - [(0, 'A'), (1, 'A'), (2, 'A'), - (3, 'A'), (4, 'G'), (5, 'G'), - (6, 'G'), (7, 'G'), (8, 'T'), - (9, 'T'), (10, 'T'), (11, 'T')] -assert list(expand_cigar("AAAAcGGGGTTTT", decode_cigar("4M1I8M"))) == \ - [(0, 'A'), (1, 'A'), (2, 'A'), - (3, 'A'), (3.5, 'c'), (4, 'G'), - (5, 'G'), (6, 'G'), (7, 'G'), - (8, 'T'), (9, 'T'), (10, 'T'), - (11, 'T')] -assert list(expand_cigar("AAAAGGGGcTTTT", decode_cigar("8M1I4M"))) == \ - [(0, 'A'), (1, 'A'), (2, 'A'), - (3, 'A'), (4, 'G'), (5, 'G'), - (6, 'G'), (7, 'G'), (7.5, "c"), - (8, 'T'), (9, 'T'), (10, 'T'), - (11, 'T')] -assert list(expand_cigar("AAAAcGGGGcTTTT", decode_cigar("4M1I4M1I4M"))) == \ - [(0, 'A'), (1, 'A'), (2, 'A'), - (3, 'A'), (3.5, 'c'), (4, 'G'), - (5, 'G'), (6, 'G'), (7, 'G'), - (7.5, 'c'), (8, 'T'), (9, 'T'), - (10, 'T'), (11, 'T')] +assert list(expand_cigar("ACGT", decode_cigar("4M"))) == [ + (0, "A"), + (1, "C"), + (2, "G"), + (3, "T"), +] +assert list(expand_cigar("ACGT", decode_cigar("2=1X1="))) == [ + (0, "A"), + (1, "C"), + (2, "G"), + (3, "T"), +] +assert list(expand_cigar("ACGT", decode_cigar("2M1D2M"))) == [ + (0, "A"), + (1, "C"), + (3, "G"), + (4, "T"), +] +assert list(expand_cigar("ACtGT", decode_cigar("2M1I2M"))) == [ + (0, "A"), + (1, "C"), + (1.5, "t"), + (2, "G"), + (3, "T"), +] +assert list(expand_cigar("tACGT", decode_cigar("1I4M"))) == [ + (-0.5, "t"), + (0, "A"), + (1, "C"), + (2, "G"), + (3, "T"), +] +assert list(expand_cigar("ACGTt", decode_cigar("4M1I"))) == [ + (0, "A"), + (1, "C"), + (2, "G"), + (3, "T"), + (3.5, "t"), +] +assert list(expand_cigar("AAAAGGGGTTTT", decode_cigar("12M"))) == [ + (0, "A"), + (1, "A"), + (2, "A"), + (3, "A"), + (4, "G"), + (5, "G"), + (6, "G"), + (7, "G"), + (8, "T"), + (9, "T"), + (10, "T"), + (11, "T"), +] +assert list(expand_cigar("AAAAcGGGGTTTT", decode_cigar("4M1I8M"))) == [ + (0, "A"), + (1, "A"), + (2, "A"), + (3, "A"), + (3.5, "c"), + (4, "G"), + (5, "G"), + (6, "G"), + (7, "G"), + (8, "T"), + (9, "T"), + (10, "T"), + (11, "T"), +] +assert list(expand_cigar("AAAAGGGGcTTTT", decode_cigar("8M1I4M"))) == [ + (0, "A"), + (1, "A"), + (2, "A"), + (3, "A"), + (4, "G"), + (5, "G"), + (6, "G"), + (7, "G"), + (7.5, "c"), + (8, "T"), + (9, "T"), + (10, "T"), + (11, "T"), +] +assert list(expand_cigar("AAAAcGGGGcTTTT", decode_cigar("4M1I4M1I4M"))) == [ + (0, "A"), + (1, "A"), + (2, "A"), + (3, "A"), + (3.5, "c"), + (4, "G"), + (5, "G"), + (6, "G"), + (7, "G"), + (7.5, "c"), + (8, "T"), + (9, "T"), + (10, "T"), + (11, "T"), +] def get_roi(seq, cigar_ops, start, end): @@ -195,7 +267,9 @@ return seq[start:end] # Would use "start <= i < end" if they were all integers, but # want to exclude e.g. 3.5 and 7.5 when given start 4 and end 8. - return "".join(base for i, base in expand_cigar(seq, cigar_ops) if start <= i <= end - 1) + return "".join( + base for i, base in expand_cigar(seq, cigar_ops) if start <= i <= end - 1 + ) assert "GGGG" == get_roi("AAAAGGGGTTTT", decode_cigar("12M"), 4, 8) @@ -218,12 +292,27 @@ # Call samtools view, don't need header so no -h added. # Only want mapped reads, thus flag filter -F 4. - child = subprocess.Popen(["samtools", "view", "-F", "4", bam_file, region], - universal_newlines=True, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + child = subprocess.Popen( + ["samtools", "view", "-F", "4", bam_file, region], + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) for line in child.stdout: assert line[0] != "@", "Got unexpected SAM header line: %s" % line - qname, flag, rname, pos, mapq, cigar, rnext, pnext, tlen, seq, rest = line.split("\t", 10) + ( + qname, + flag, + rname, + pos, + mapq, + cigar, + rnext, + pnext, + tlen, + seq, + rest, + ) = line.split("\t", 10) pos = int(pos) # one-based if start < pos: # Does not span the ROI @@ -247,9 +336,11 @@ if return_code: sys.exit("Got return code %i from samtools view" % return_code) elif "specifies an unknown reference name. Continue anyway." in stderr: - sys.exit(stderr.strip() + - "\n\nERROR: samtools did not recognise the region requested, " - "can't count any variants.") + sys.exit( + stderr.strip() + + "\n\nERROR: samtools did not recognise the region requested, " + "can't count any variants." + ) return tally
--- a/tools/count_roi_variants/tool_dependencies.xml Fri Nov 09 10:46:50 2018 -0500 +++ b/tools/count_roi_variants/tool_dependencies.xml Fri Feb 22 10:11:49 2019 -0500 @@ -1,6 +1,6 @@ -<?xml version="1.0"?> +<?xml version="1.0" ?> <tool_dependency> <package name="samtools" version="1.2"> - <repository changeset_revision="5b7172f9b230" name="package_samtools_1_2" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="5b7172f9b230" name="package_samtools_1_2" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"/> </package> -</tool_dependency> +</tool_dependency> \ No newline at end of file