Mercurial > repos > drosofff > lumpy
annotate pairend_distro.py @ 13:62ab5284ae6b draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit 8ea053586c663715db915668ebe676a461c23e85
author | drosofff |
---|---|
date | Wed, 18 Jan 2017 06:51:48 -0500 |
parents | bbdc52775678 |
children |
rev | line source |
---|---|
0
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
1 #!/usr/bin/env python |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
2 # (c) 2012 - Ryan M. Layer |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
3 # Hall Laboratory |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
4 # Quinlan Laboratory |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
5 # Department of Computer Science |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
6 # Department of Biochemistry and Molecular Genetics |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
7 # Department of Public Health Sciences and Center for Public Health Genomics, |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
8 # University of Virginia |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
9 # rl6sf@virginia.edu |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
10 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
11 import sys |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
12 import numpy as np |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
13 from operator import itemgetter |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
14 from optparse import OptionParser |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
15 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
16 # some constants for sam/bam field ids |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
17 SAM_FLAG = 1 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
18 SAM_REFNAME = 2 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
19 SAM_MATE_REFNAME = 6 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
20 SAM_ISIZE = 8 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
21 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
22 parser = OptionParser() |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
23 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
24 parser.add_option("-r", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
25 "--read_length", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
26 type="int", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
27 dest="read_length", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
28 help="Read length") |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
29 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
30 parser.add_option("-X", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
31 dest="X", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
32 type="int", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
33 help="Number of stdevs from mean to extend") |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
34 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
35 parser.add_option("-N", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
36 dest="N", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
37 type="int", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
38 help="Number to sample") |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
39 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
40 parser.add_option("-o", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
41 dest="output_file", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
42 help="Output file") |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
43 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
44 parser.add_option("-m", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
45 dest="mads", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
46 type="int", |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
47 default=10, |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
48 help="Outlier cutoff in # of median absolute deviations (unscaled, upper only)") |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
49 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
50 def unscaled_upper_mad(xs): |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
51 """Return a tuple consisting of the median of xs followed by the |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
52 unscaled median absolute deviation of the values in xs that lie |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
53 above the median. |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
54 """ |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
55 med = np.median(xs) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
56 return med, np.median(xs[xs > med] - med) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
57 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
58 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
59 (options, args) = parser.parse_args() |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
60 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
61 if not options.read_length: |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
62 parser.error('Read length not given') |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
63 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
64 if not options.X: |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
65 parser.error('X not given') |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
66 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
67 if not options.N: |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
68 parser.error('N not given') |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
69 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
70 if not options.output_file: |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
71 parser.error('Output file not given') |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
72 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
73 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
74 required = 97 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
75 restricted = 3484 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
76 flag_mask = required | restricted |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
77 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
78 L = [] |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
79 c = 0 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
80 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
81 for l in sys.stdin: |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
82 if c >= options.N: |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
83 break |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
84 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
85 A = l.rstrip().split('\t') |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
86 flag = int(A[SAM_FLAG]) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
87 refname = A[SAM_REFNAME] |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
88 mate_refname = A[SAM_MATE_REFNAME] |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
89 isize = int(A[SAM_ISIZE]) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
90 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
91 want = mate_refname == "=" and flag & flag_mask == required and isize >= 0 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
92 if want: |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
93 c += 1 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
94 L.append(isize) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
95 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
96 # warn if very few elements in distribution |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
97 min_elements = 1000 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
98 if len(L) < min_elements: |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
99 sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" % (len(L), min_elements)) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
100 mean = "NA" |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
101 stdev = "NA" |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
102 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
103 else: |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
104 # Remove outliers |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
105 L = np.array(L) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
106 L.sort() |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
107 med, umad = unscaled_upper_mad(L) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
108 upper_cutoff = med + options.mads * umad |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
109 L = L[L < upper_cutoff] |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
110 new_len = len(L) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
111 removed = c - new_len |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
112 sys.stderr.write("Removed %d outliers with isize >= %d\n" % |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
113 (removed, upper_cutoff)) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
114 c = new_len |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
115 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
116 mean = np.mean(L) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
117 stdev = np.std(L) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
118 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
119 start = options.read_length |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
120 end = int(mean + options.X*stdev) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
121 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
122 H = [0] * (end - start + 1) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
123 s = 0 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
124 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
125 for x in L: |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
126 if (x >= start) and (x <= end): |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
127 j = int(x - start) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
128 H[j] = H[ int(x - start) ] + 1 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
129 s += 1 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
130 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
131 f = open(options.output_file, 'w') |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
132 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
133 for i in range(end - start): |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
134 o = str(i) + "\t" + str(float(H[i])/float(s)) + "\n" |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
135 f.write(o) |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
136 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
137 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
138 f.close() |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
139 |
bbdc52775678
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
drosofff
parents:
diff
changeset
|
140 print('mean:' + str(mean) + '\tstdev:' + str(stdev)) |