annotate dapars.py @ 17:917a2f7ab841 draft

planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
author mvdbeek
date Fri, 30 Oct 2015 10:35:17 -0400
parents f8bb40b2ff31
children ed151db39c7e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
1 import argparse
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
2 import os
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
3 import csv
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
4 import numpy as np
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
5 from scipy import stats
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
6 from collections import OrderedDict, namedtuple
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
7 import filter_utr
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
8 import subprocess
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
9 from multiprocessing import Pool
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
10 import warnings
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
11 import matplotlib.pyplot as plt
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
12 import matplotlib.gridspec as gridspec
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
13 from tabulate import tabulate
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
14 import statsmodels.sandbox.stats.multicomp as mc
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
15
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
16 def directory_path(str):
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
17 if os.path.exists(str):
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
18 return str
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
19 else:
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
20 os.mkdir(str)
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
21 return str
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
22
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
23 def parse_args():
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
24 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
25 Returns floating point values except for input files.
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
26 My initial approach will not filter anything. (FDR. fold_change, PDUI, Num_least ...)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
27 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
28 parser = argparse.ArgumentParser(prog='DaPars', description='Determines the usage of proximal polyA usage')
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
29 parser.add_argument("-c", "--control_alignments", nargs="+", required=True,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
30 help="Alignment files in BAM format from control condition")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
31 parser.add_argument("-t", "--treatment_alignments", nargs="+", required=True,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
32 help="Alignment files in BAM format from treatment condition")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
33 parser.add_argument("-u", "--utr_bed_file", required=True, type=file,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
34 help="Bed file describing longest 3UTR positions")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
35 parser.add_argument("-o", "--output_file", required=True, type=argparse.FileType('w'),
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
36 help="file containing output")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
37 parser.add_argument("-cpu", required=False, type=int, default=1,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
38 help="Number of CPU cores to use.")
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
39 parser.add_argument("-l", "--local_minimum", action='store_true')
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
40 parser.add_argument("-s", "--search_start", required=False, type=int, default=50,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
41 help="Start search for breakpoint n nucleotides downstream of UTR start")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
42 parser.add_argument("-ct", "--coverage_threshold", required=False, type=float, default=20,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
43 help="minimum coverage in each aligment to be considered for determining breakpoints")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
44 parser.add_argument("-b", "--breakpoint_bed", required=False, type=argparse.FileType('w'),
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
45 help="Write bedfile with coordinates of breakpoint positions to supplied path.")
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
46 parser.add_argument("-v", "--version", action='version', version='%(prog)s 0.2.3')
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
47 parser.add_argument("-p", "--plot_path", default=None, required=False, type=directory_path,
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
48 help="If plot_path is specified will write a coverage plot for every UTR in that directory.")
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
49 parser.add_argument("-html", "--html_file", default=None, required=False, type=argparse.FileType('w'),
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
50 help="Write an html file to the specified location. Only to be used within a galaxy wrapper")
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
51 return parser.parse_args()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
52
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
53
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
54 class UtrFinder():
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
55 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
56 This seems to be the main caller.
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
57 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
58
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
59 def __init__(self, args):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
60 self.control_alignments = [file for file in args.control_alignments]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
61 self.treatment_alignments = [file for file in args.treatment_alignments]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
62 self.n_cpus = args.cpu
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
63 self.search_start = args.search_start
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
64 self.coverage_threshold = args.coverage_threshold
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
65 self.local_minimum = args.local_minimum
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
66 self.plot_path = args.plot_path
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
67 self.html_file = args.html_file
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
68 self.utr = args.utr_bed_file
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
69 self.gtf_fields = filter_utr.get_gtf_fields()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
70 self.result_file = args.output_file
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
71 self.all_alignments = self.control_alignments + self.treatment_alignments
13
538c4e2b423e planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents: 5
diff changeset
72 self.alignment_names = OrderedDict(( file, os.path.basename(file)) for file in self.all_alignments )
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
73 self.num_samples = len(self.all_alignments)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
74 self.utr_dict = self.get_utr_dict(0.2)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
75 self.dump_utr_dict_to_bedfile()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
76 print "Established dictionary of 3\'UTRs"
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
77 self.coverage_files = self.run_bedtools_coverage()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
78 self.utr_coverages = self.read_coverage_result()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
79 print "Established dictionary of 3\'UTR coverages"
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
80 self.coverage_weights = self.get_coverage_weights()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
81 self.result_tuple = self.get_result_tuple()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
82 self.result_d = self.calculate_apa_ratios()
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
83 self.results = self.order_by_p()
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
84 self.write_results()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
85 if args.breakpoint_bed:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
86 self.bed_output = args.breakpoint_bed
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
87 self.write_bed()
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
88 if self.plot_path:
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
89 self.write_html()
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
90
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
91 def order_by_p(self):
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
92 results = [result for result in self.result_d.itervalues()]
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
93 p_values = np.array([ result.p_value for result in self.result_d.itervalues() ])
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
94 adj_p_values = mc.fdrcorrection0(p_values, 0.05)[1]
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
95 sort_index = np.argsort(adj_p_values)
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
96 results = [ results[i]._replace(adj_p_value=adj_p_values[i]) for i in sort_index ]
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
97 return results
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
98
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
99
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
100 def dump_utr_dict_to_bedfile(self):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
101 w = csv.writer(open("tmp_bedfile.bed", "w"), delimiter="\t")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
102 for gene, utr in self.utr_dict.iteritems():
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
103 w.writerow([utr["chr"], utr["new_start"]-1, utr["new_end"], gene, ".", utr["strand"]])
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
104
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
105 def run_bedtools_coverage(self):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
106 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
107 Use bedtools coverage to generate pileup data for all alignment files for the regions specified in utr_dict.
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
108 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
109 cmds = []
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
110 for alignment_file in self.all_alignments:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
111 cmd = "sort -k1,1 -k2,2n tmp_bedfile.bed | "
16
f8bb40b2ff31 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 15
diff changeset
112 cmd = cmd + "bedtools coverage -d -s -abam {alignment_file} -b stdin |" \
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
113 " cut -f 4,7,8 > coverage_file_{alignment_name}".format(
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
114 alignment_file = alignment_file, alignment_name= self.alignment_names[alignment_file] )
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
115 cmds.append(cmd)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
116 subprocesses = [subprocess.Popen([cmd], shell=True) for cmd in cmds]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
117 [p.wait() for p in subprocesses]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
118 coverage_files = ["gene_position_coverage_{alignment_name}".format(
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
119 alignment_name = self.alignment_names[alignment_file]) for alignment_file in self.all_alignments ]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
120 return coverage_files
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
121
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
122 def read_coverage_result(self):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
123 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
124 Read coverages back in and store as dictionary of numpy arrays
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
125 """
13
538c4e2b423e planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents: 5
diff changeset
126 coverage_dict = { gene: [np.zeros(utr_d["new_end"]+1-utr_d["new_start"]) for name in self.alignment_names.values() ] for gene, utr_d in self.utr_dict.items() }
538c4e2b423e planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents: 5
diff changeset
127 for i, alignment_name in enumerate(self.alignment_names.values()):
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
128 with open("coverage_file_{alignment_name}".format(alignment_name = alignment_name)) as coverage_file:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
129 for line in coverage_file:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
130 gene, position, coverage= line.strip().split("\t")
13
538c4e2b423e planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents: 5
diff changeset
131 coverage_dict[gene][i][int(position)-1] = coverage
538c4e2b423e planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents: 5
diff changeset
132 for utr_d in self.utr_dict.values():
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
133 if utr_d["strand"] == "-":
13
538c4e2b423e planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents: 5
diff changeset
134 for i, alignment_name in enumerate(self.alignment_names.values()):
538c4e2b423e planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents: 5
diff changeset
135 coverage_dict[gene][i] = coverage_dict[gene][i][::-1]
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
136 return coverage_dict
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
137
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
138 def get_utr_dict(self, shift):
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
139 """
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
140 The utr end is extended by UTR length * shift, to discover novel distal polyA sites.
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
141 Set to 0 to disable.
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
142 """
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
143 utr_dict = OrderedDict()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
144 for line in self.utr:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
145 if not line.startswith("#"):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
146 filter_utr.get_feature_dict( line=line, gtf_fields=self.gtf_fields, utr_dict=utr_dict, feature="UTR" )
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
147 gene, utr_d = utr_dict.popitem()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
148 utr_d = utr_d[0]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
149 end_shift = int(round(abs(utr_d["start"] - utr_d["end"]) * shift))
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
150 if utr_d["strand"] == "+":
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
151 utr_d["new_end"] = utr_d["end"] - end_shift
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
152 utr_d["new_start"] = utr_d["start"]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
153 else:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
154 utr_d["new_end"] = utr_d["end"]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
155 utr_d["new_start"] = utr_d["start"] + end_shift
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
156 if utr_d["new_start"] + 50 < utr_d["new_end"]:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
157 utr_dict[gene] = utr_d
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
158 return utr_dict
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
159
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
160 def get_coverage_weights(self):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
161 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
162 Return weights for normalizing coverage.
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
163 utr_coverage is still confusing.
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
164 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
165 coverage_per_alignment = []
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
166 for utr in self.utr_coverages.itervalues(): # TODO: be smarter about this.
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
167 utr_coverage = []
13
538c4e2b423e planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents: 5
diff changeset
168 for vector in utr:
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
169 utr_coverage.append(np.sum(vector))
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
170 coverage_per_alignment.append(utr_coverage)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
171 coverages = np.array([ sum(x) for x in zip(*coverage_per_alignment) ])
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
172 coverage_weights = coverages / np.mean(coverages) # TODO: proabably median is better suited? Or even no normalization!
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
173 return coverage_weights
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
174
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
175 def get_result_tuple(self):
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
176 static_desc = ["chr", "start", "end", "strand", "gene", "t_stat", "p_value", "adj_p_value", "breakpoint",
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
177 "breakpoint_type", "control_mean_percent", "treatment_mean_percent" ]
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
178 samples_desc = []
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
179 for statistic in ["coverage_long", "coverage_short", "percent_long"]:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
180 for i, sample in enumerate(self.control_alignments):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
181 samples_desc.append("control_{i}_{statistic}".format(i=i, statistic = statistic))
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
182 for i, sample in enumerate(self.treatment_alignments):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
183 samples_desc.append("treatment_{i}_{statistic}".format(i=i, statistic = statistic))
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
184 return namedtuple("result", static_desc + samples_desc)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
185
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
186 def calculate_apa_ratios(self):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
187 result_d = OrderedDict()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
188 arg_d = {"result_tuple": self.result_tuple,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
189 "coverage_weights":self.coverage_weights,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
190 "num_samples":self.num_samples,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
191 "num_control":len(self.control_alignments),
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
192 "num_treatment":len(self.treatment_alignments),
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
193 "result_d":result_d}
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
194 pool = Pool(self.n_cpus)
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
195 tasks = [ (self.utr_coverages[utr], self.plot_path, utr, utr_d, self.coverage_weights, len(self.control_alignments),
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
196 len(self.treatment_alignments), self.search_start, self.local_minimum, self.coverage_threshold) \
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
197 for utr, utr_d in self.utr_dict.iteritems() ]
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
198 #processed_tasks = [ pool.apply_async(calculate_all_utr, t) for t in tasks]
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
199 #result_list = [res.get() for res in processed_tasks]
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
200 result_list = [calculate_all_utr(*t) for t in tasks] # uncomment for easier debugging
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
201 for res_control, res_treatment in result_list:
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
202 if not res_control:
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
203 continue
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
204 for i, result in enumerate(res_control):
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
205 if isinstance(result, dict):
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
206 t = self.result_tuple(**result)
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
207 result_d[result["gene"]+"_bp_control_{i}".format(i=i)] = t
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
208 for i, result in enumerate(res_treatment):
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
209 if isinstance(result, dict):
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
210 t = self.result_tuple(**result)
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
211 result_d[result["gene"]+"_bp_treatment_{i}".format(i=i)] = t
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
212 return result_d
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
213
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
214 def write_results(self):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
215 w = csv.writer(self.result_file, delimiter='\t')
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
216 header = list(self.result_tuple._fields)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
217 header[0] = "#chr"
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
218 w.writerow(header) # field header
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
219 w.writerows( self.results)
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
220
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
221 def write_html(self):
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
222 output_lines = [(gene_str_to_link(result.gene), result.breakpoint, result.breakpoint_type, result.p_value ) for result in self.results]
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
223 if self.html_file:
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
224 self.html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value", "adj_p_value"], tablefmt="html"))
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
225 else:
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
226 with open(os.path.join(self.plot_path, "index.html"), "w") as html_file:
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
227 html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value", "adj_p_value"], tablefmt="html"))
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
228 html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value", "adj_p_value"], tablefmt="html"))
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
229
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
230 def write_bed(self):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
231 w = csv.writer(self.bed_output, delimiter='\t')
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
232 bed = [(result.chr, result.breakpoint, int(result.breakpoint)+1, result.gene+"_"+result.breakpoint_type, 0, result.strand) for result in self.results]
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
233 w.writerows(bed)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
234
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
235
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
236 def calculate_all_utr(utr_coverage, plot_path, utr, utr_d, coverage_weights, num_control, num_treatment, search_start, local_minimum, coverage_threshold):
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
237 if utr_d["strand"] == "+":
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
238 is_reverse = False
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
239 else:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
240 is_reverse = True
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
241 control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances = \
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
242 optimize_breakpoint(plot_path, utr, utr_coverage, utr_d["new_start"], utr_d["new_end"], coverage_weights, search_start, local_minimum, coverage_threshold, num_control)
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
243 res_control = breakpoints_to_result(utr, utr_d, control_breakpoints, "control_breakpoint", control_abundances, is_reverse,
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
244 num_control, num_treatment)
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
245 res_treatment = breakpoints_to_result(utr, utr_d, treatment_breakpoints, "treatment_breakpoint", treatment_abundances, is_reverse,
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
246 num_control, num_treatment)
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
247 return res_control, res_treatment
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
248
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
249
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
250 def breakpoints_to_result(utr, utr_d, breakpoints, breakpoint_type,
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
251 abundances, is_reverse, num_control, num_treatment):
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
252 """
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
253 Takes in a result dictionary res and fills the necessary fields
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
254 """
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
255 if not breakpoints:
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
256 return False
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
257 result = []
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
258 for breakpoint, abundance in zip(breakpoints, abundances):
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
259 res = {}
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
260 res["adj_p_value"] = "NA"
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
261 long_coverage_vector, short_coverage_vector = abundance
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
262 percentage_long = long_coverage_vector/(long_coverage_vector+short_coverage_vector)
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
263 for i in range(num_control):
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
264 res["control_{i}_coverage_long".format(i=i)] = long_coverage_vector[i]
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
265 res["control_{i}_coverage_short".format(i=i)] = short_coverage_vector[i]
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
266 res["control_{i}_percent_long".format(i=i)] = percentage_long[i]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
267 for k in range(num_treatment):
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
268 i = k + num_control
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
269 res["treatment_{i}_coverage_long".format(i=k)] = long_coverage_vector[i]
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
270 res["treatment_{i}_coverage_short".format(i=k)] = short_coverage_vector[i]
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
271 res["treatment_{i}_percent_long".format(i=k)] = percentage_long[i]
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
272 res["t_stat"], res["p_value"] = stat_test(percentage_long[:num_control], percentage_long[num_control:])
14
ea5d573d6d40 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 13
diff changeset
273 control_mean_percent = np.mean(percentage_long[:num_control])
ea5d573d6d40 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 13
diff changeset
274 treatment_mean_percent = np.mean(percentage_long[num_control:])
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
275 res["chr"] = utr_d["chr"]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
276 res["start"] = utr_d["start"]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
277 res["end"] = utr_d["end"]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
278 res["strand"] = utr_d["strand"]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
279 if is_reverse:
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
280 breakpoint = utr_d["new_end"] - breakpoint
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
281 else:
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
282 breakpoint = utr_d["new_start"] + breakpoint
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
283 res["breakpoint"] = breakpoint
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
284 res["breakpoint_type"] = breakpoint_type
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
285 res["control_mean_percent"] = control_mean_percent
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
286 res["treatment_mean_percent"] = treatment_mean_percent
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
287 res["gene"] = utr
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
288 result.append(res)
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
289 return result
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
290
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
291
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
292 def optimize_breakpoint(plot_path, utr, utr_coverage, UTR_start, UTR_end, coverage_weigths, search_start, local_minimum, coverage_threshold, num_control):
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
293 """
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
294 We are searching for a point within the UTR that minimizes the mean squared error, if the coverage vector was divided
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
295 at that point. utr_coverage is a list with items corresponding to numpy arrays of coverage for a sample.
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
296 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
297 num_samples = len(utr_coverage)
13
538c4e2b423e planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents: 5
diff changeset
298 normalized_utr_coverage = np.array(utr_coverage)/np.expand_dims(coverage_weigths, axis=1)
538c4e2b423e planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents: 5
diff changeset
299 start_coverage = [np.mean(coverage[0:99]) for coverage in utr_coverage] # filters threshold on mean coverage over first 100 nt
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
300 is_above_threshold = sum(np.array(start_coverage) >= coverage_threshold) >= num_samples # This filters on the raw threshold. Why?
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
301 is_above_length = UTR_end - UTR_start >= 150
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
302 if (is_above_threshold) and (is_above_length):
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
303 search_end = UTR_end - UTR_start
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
304 breakpoints = range(search_start, search_end + 1)
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
305 mse_list = [ estimate_mse(normalized_utr_coverage, bp, num_samples, num_control) for bp in breakpoints ]
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
306 mse_list = [mse_list[0] for i in xrange(search_start)] + mse_list
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
307 if plot_path:
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
308 plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control)
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
309 if len(mse_list) > 0:
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
310 return mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples, local_minimum)
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
311 return False, False, False, False
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
312
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
313
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
314 def plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control):
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
315 """
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
316
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
317 """
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
318 fig = plt.figure(figsize=(8, 8))
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
319 gs = gridspec.GridSpec(2, 1)
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
320 ax1 = plt.subplot(gs[0, :])
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
321 ax2 = plt.subplot(gs[1, :])
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
322 ax1.set_title("mean-squared error plot")
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
323 ax1.set_ylabel("mean-squared error")
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
324 ax1.set_xlabel("nt after UTR start")
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
325 ax2.set_title("coverage plot")
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
326 ax2.set_xlabel("nt after UTR start")
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
327 ax2.set_ylabel("normalized nucleotide coverage")
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
328 mse_control = [ condition[0] for condition in mse_list]
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
329 mse_treatment = [ condition[1] for condition in mse_list]
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
330 minima_control = get_minima(np.array(mse_control))
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
331 minima_treatment = get_minima(np.array(mse_treatment))
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
332 control = normalized_utr_coverage[:num_control]
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
333 treatment = normalized_utr_coverage[num_control:]
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
334 ax1.plot(mse_control, "b-")
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
335 ax1.plot(mse_treatment, "r-")
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
336 [ax2.plot(cov, "b-") for cov in control]
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
337 [ax2.plot(cov, "r-") for cov in treatment]
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
338 [ax2.axvline(val, color="b", alpha=0.25) for val in minima_control]
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
339 ax2.axvline(mse_control.index(min(mse_control)), color="b", alpha=1)
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
340 [ax2.axvline(val, color="r", alpha=0.25) for val in minima_treatment]
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
341 ax2.axvline(mse_treatment.index(min(mse_treatment)), color="r", alpha=1)
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
342 fig.add_subplot(ax1)
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
343 fig.add_subplot(ax2)
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
344 gs.tight_layout(fig)
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
345 fig.savefig(os.path.join(plot_path, "{utr}.svg".format(utr=utr)))
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
346
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
347
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
348 def mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples, local_minimum):
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
349 """
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
350 Take in mse_list with control and treatment mse and return breakpoint and utr abundance for all local minima
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
351 in mse_list
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
352 """
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
353 mse_control = np.array([mse[0] for mse in mse_list])
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
354 mse_treatment = np.array([mse[1] for mse in mse_list])
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
355 control_breakpoints = list(get_minima(mse_control, local_minimum))
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
356 treatment_breakpoints = list(get_minima(mse_treatment, local_minimum))
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
357 control_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in control_breakpoints]
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
358 treatment_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in treatment_breakpoints]
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
359 return control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
360
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
361 def get_minima(a, local_minimum=False):
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
362 """
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
363 get minima for numpy array a. If local is false, only return absolute minimum
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
364 """
17
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
365 if not local_minimum:
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
366 return np.where(a == a.min())
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
367 else:
917a2f7ab841 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 16
diff changeset
368 return np.where(np.r_[True, a[1:] < a[:-1]] & np.r_[a[:-1] < a[1:], True])[0]+1
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
369
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
370 def estimate_mse(cov, bp, num_samples, num_control):
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
371 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
372 get abundance of long utr vs short utr with breakpoint specifying the position of long and short utr.
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
373 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
374 with warnings.catch_warnings():
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
375 warnings.simplefilter("ignore", category=RuntimeWarning)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
376 long_utr_vector = cov[:num_samples, bp:]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
377 short_utr_vector = cov[:num_samples, 0:bp]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
378 mean_long_utr = np.mean(long_utr_vector, 1)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
379 mean_short_utr = np.mean(short_utr_vector, 1)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
380 square_mean_centered_short_utr_vector = (short_utr_vector[:num_samples] - mean_short_utr[:, np.newaxis] )**2
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
381 square_mean_centered_long_utr_vector = (long_utr_vector[:num_samples] - mean_long_utr[:, np.newaxis])**2
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
382 mse_control = np.mean(np.append(square_mean_centered_long_utr_vector[:num_control], square_mean_centered_short_utr_vector[:num_control]))
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
383 mse_treatment = np.mean(np.append(square_mean_centered_long_utr_vector[num_control:], square_mean_centered_short_utr_vector[num_control:]))
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
384 return mse_control, mse_treatment
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
385
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
386
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
387 def estimate_abundance(cov, bp, num_samples):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
388 with warnings.catch_warnings():
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
389 warnings.simplefilter("ignore", category=RuntimeWarning)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
390 long_utr_vector = cov[:num_samples, bp:]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
391 short_utr_vector = cov[:num_samples, 0:bp]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
392 mean_long_utr = np.mean(long_utr_vector, 1)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
393 mean_short_utr = np.mean(short_utr_vector, 1)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
394 return mean_long_utr, mean_short_utr
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
395
5
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
396 def stat_test(a,b):
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
397 return stats.ttest_ind(a,b)
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
398
a5d8b08af089 planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents: 4
diff changeset
399 def gene_str_to_link(str):
14
ea5d573d6d40 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 13
diff changeset
400 return "<a href=\"{str}.svg\" type=\"image/svg+xml\">{str}</a>".format(str=str)
ea5d573d6d40 planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents: 13
diff changeset
401
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
402
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
403 if __name__ == '__main__':
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
404 args = parse_args()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
405 find_utr = UtrFinder(args)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
406