Mercurial > repos > mvdbeek > dapars
annotate dapars.py @ 17:917a2f7ab841 draft
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
author | mvdbeek |
---|---|
date | Fri, 30 Oct 2015 10:35:17 -0400 |
parents | f8bb40b2ff31 |
children | ed151db39c7e |
rev | line source |
---|---|
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
1 import argparse |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
2 import os |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
3 import csv |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
4 import numpy as np |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
5 from scipy import stats |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
6 from collections import OrderedDict, namedtuple |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
7 import filter_utr |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
8 import subprocess |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
9 from multiprocessing import Pool |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
10 import warnings |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
11 import matplotlib.pyplot as plt |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
12 import matplotlib.gridspec as gridspec |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
13 from tabulate import tabulate |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
14 import statsmodels.sandbox.stats.multicomp as mc |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
15 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
16 def directory_path(str): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
17 if os.path.exists(str): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
18 return str |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
19 else: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
20 os.mkdir(str) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
21 return str |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
22 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
23 def parse_args(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
24 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
25 Returns floating point values except for input files. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
26 My initial approach will not filter anything. (FDR. fold_change, PDUI, Num_least ...) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
27 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
28 parser = argparse.ArgumentParser(prog='DaPars', description='Determines the usage of proximal polyA usage') |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
29 parser.add_argument("-c", "--control_alignments", nargs="+", required=True, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
30 help="Alignment files in BAM format from control condition") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
31 parser.add_argument("-t", "--treatment_alignments", nargs="+", required=True, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
32 help="Alignment files in BAM format from treatment condition") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
33 parser.add_argument("-u", "--utr_bed_file", required=True, type=file, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
34 help="Bed file describing longest 3UTR positions") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
35 parser.add_argument("-o", "--output_file", required=True, type=argparse.FileType('w'), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
36 help="file containing output") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
37 parser.add_argument("-cpu", required=False, type=int, default=1, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
38 help="Number of CPU cores to use.") |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
39 parser.add_argument("-l", "--local_minimum", action='store_true') |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
40 parser.add_argument("-s", "--search_start", required=False, type=int, default=50, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
41 help="Start search for breakpoint n nucleotides downstream of UTR start") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
42 parser.add_argument("-ct", "--coverage_threshold", required=False, type=float, default=20, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
43 help="minimum coverage in each aligment to be considered for determining breakpoints") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
44 parser.add_argument("-b", "--breakpoint_bed", required=False, type=argparse.FileType('w'), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
45 help="Write bedfile with coordinates of breakpoint positions to supplied path.") |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
46 parser.add_argument("-v", "--version", action='version', version='%(prog)s 0.2.3') |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
47 parser.add_argument("-p", "--plot_path", default=None, required=False, type=directory_path, |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
48 help="If plot_path is specified will write a coverage plot for every UTR in that directory.") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
49 parser.add_argument("-html", "--html_file", default=None, required=False, type=argparse.FileType('w'), |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
50 help="Write an html file to the specified location. Only to be used within a galaxy wrapper") |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
51 return parser.parse_args() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
52 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
53 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
54 class UtrFinder(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
55 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
56 This seems to be the main caller. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
57 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
58 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
59 def __init__(self, args): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
60 self.control_alignments = [file for file in args.control_alignments] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
61 self.treatment_alignments = [file for file in args.treatment_alignments] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
62 self.n_cpus = args.cpu |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
63 self.search_start = args.search_start |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
64 self.coverage_threshold = args.coverage_threshold |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
65 self.local_minimum = args.local_minimum |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
66 self.plot_path = args.plot_path |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
67 self.html_file = args.html_file |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
68 self.utr = args.utr_bed_file |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
69 self.gtf_fields = filter_utr.get_gtf_fields() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
70 self.result_file = args.output_file |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
71 self.all_alignments = self.control_alignments + self.treatment_alignments |
13
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
72 self.alignment_names = OrderedDict(( file, os.path.basename(file)) for file in self.all_alignments ) |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
73 self.num_samples = len(self.all_alignments) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
74 self.utr_dict = self.get_utr_dict(0.2) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
75 self.dump_utr_dict_to_bedfile() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
76 print "Established dictionary of 3\'UTRs" |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
77 self.coverage_files = self.run_bedtools_coverage() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
78 self.utr_coverages = self.read_coverage_result() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
79 print "Established dictionary of 3\'UTR coverages" |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
80 self.coverage_weights = self.get_coverage_weights() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
81 self.result_tuple = self.get_result_tuple() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
82 self.result_d = self.calculate_apa_ratios() |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
83 self.results = self.order_by_p() |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
84 self.write_results() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
85 if args.breakpoint_bed: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
86 self.bed_output = args.breakpoint_bed |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
87 self.write_bed() |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
88 if self.plot_path: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
89 self.write_html() |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
90 |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
91 def order_by_p(self): |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
92 results = [result for result in self.result_d.itervalues()] |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
93 p_values = np.array([ result.p_value for result in self.result_d.itervalues() ]) |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
94 adj_p_values = mc.fdrcorrection0(p_values, 0.05)[1] |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
95 sort_index = np.argsort(adj_p_values) |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
96 results = [ results[i]._replace(adj_p_value=adj_p_values[i]) for i in sort_index ] |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
97 return results |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
98 |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
99 |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
100 def dump_utr_dict_to_bedfile(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
101 w = csv.writer(open("tmp_bedfile.bed", "w"), delimiter="\t") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
102 for gene, utr in self.utr_dict.iteritems(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
103 w.writerow([utr["chr"], utr["new_start"]-1, utr["new_end"], gene, ".", utr["strand"]]) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
104 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
105 def run_bedtools_coverage(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
106 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
107 Use bedtools coverage to generate pileup data for all alignment files for the regions specified in utr_dict. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
108 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
109 cmds = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
110 for alignment_file in self.all_alignments: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
111 cmd = "sort -k1,1 -k2,2n tmp_bedfile.bed | " |
16
f8bb40b2ff31
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
15
diff
changeset
|
112 cmd = cmd + "bedtools coverage -d -s -abam {alignment_file} -b stdin |" \ |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
113 " cut -f 4,7,8 > coverage_file_{alignment_name}".format( |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
114 alignment_file = alignment_file, alignment_name= self.alignment_names[alignment_file] ) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
115 cmds.append(cmd) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
116 subprocesses = [subprocess.Popen([cmd], shell=True) for cmd in cmds] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
117 [p.wait() for p in subprocesses] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
118 coverage_files = ["gene_position_coverage_{alignment_name}".format( |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
119 alignment_name = self.alignment_names[alignment_file]) for alignment_file in self.all_alignments ] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
120 return coverage_files |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
121 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
122 def read_coverage_result(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
123 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
124 Read coverages back in and store as dictionary of numpy arrays |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
125 """ |
13
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
126 coverage_dict = { gene: [np.zeros(utr_d["new_end"]+1-utr_d["new_start"]) for name in self.alignment_names.values() ] for gene, utr_d in self.utr_dict.items() } |
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
127 for i, alignment_name in enumerate(self.alignment_names.values()): |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
128 with open("coverage_file_{alignment_name}".format(alignment_name = alignment_name)) as coverage_file: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
129 for line in coverage_file: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
130 gene, position, coverage= line.strip().split("\t") |
13
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
131 coverage_dict[gene][i][int(position)-1] = coverage |
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
132 for utr_d in self.utr_dict.values(): |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
133 if utr_d["strand"] == "-": |
13
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
134 for i, alignment_name in enumerate(self.alignment_names.values()): |
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
135 coverage_dict[gene][i] = coverage_dict[gene][i][::-1] |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
136 return coverage_dict |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
137 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
138 def get_utr_dict(self, shift): |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
139 """ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
140 The utr end is extended by UTR length * shift, to discover novel distal polyA sites. |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
141 Set to 0 to disable. |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
142 """ |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
143 utr_dict = OrderedDict() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
144 for line in self.utr: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
145 if not line.startswith("#"): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
146 filter_utr.get_feature_dict( line=line, gtf_fields=self.gtf_fields, utr_dict=utr_dict, feature="UTR" ) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
147 gene, utr_d = utr_dict.popitem() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
148 utr_d = utr_d[0] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
149 end_shift = int(round(abs(utr_d["start"] - utr_d["end"]) * shift)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
150 if utr_d["strand"] == "+": |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
151 utr_d["new_end"] = utr_d["end"] - end_shift |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
152 utr_d["new_start"] = utr_d["start"] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
153 else: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
154 utr_d["new_end"] = utr_d["end"] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
155 utr_d["new_start"] = utr_d["start"] + end_shift |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
156 if utr_d["new_start"] + 50 < utr_d["new_end"]: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
157 utr_dict[gene] = utr_d |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
158 return utr_dict |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
159 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
160 def get_coverage_weights(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
161 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
162 Return weights for normalizing coverage. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
163 utr_coverage is still confusing. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
164 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
165 coverage_per_alignment = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
166 for utr in self.utr_coverages.itervalues(): # TODO: be smarter about this. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
167 utr_coverage = [] |
13
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
168 for vector in utr: |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
169 utr_coverage.append(np.sum(vector)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
170 coverage_per_alignment.append(utr_coverage) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
171 coverages = np.array([ sum(x) for x in zip(*coverage_per_alignment) ]) |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
172 coverage_weights = coverages / np.mean(coverages) # TODO: proabably median is better suited? Or even no normalization! |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
173 return coverage_weights |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
174 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
175 def get_result_tuple(self): |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
176 static_desc = ["chr", "start", "end", "strand", "gene", "t_stat", "p_value", "adj_p_value", "breakpoint", |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
177 "breakpoint_type", "control_mean_percent", "treatment_mean_percent" ] |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
178 samples_desc = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
179 for statistic in ["coverage_long", "coverage_short", "percent_long"]: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
180 for i, sample in enumerate(self.control_alignments): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
181 samples_desc.append("control_{i}_{statistic}".format(i=i, statistic = statistic)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
182 for i, sample in enumerate(self.treatment_alignments): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
183 samples_desc.append("treatment_{i}_{statistic}".format(i=i, statistic = statistic)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
184 return namedtuple("result", static_desc + samples_desc) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
185 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
186 def calculate_apa_ratios(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
187 result_d = OrderedDict() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
188 arg_d = {"result_tuple": self.result_tuple, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
189 "coverage_weights":self.coverage_weights, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
190 "num_samples":self.num_samples, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
191 "num_control":len(self.control_alignments), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
192 "num_treatment":len(self.treatment_alignments), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
193 "result_d":result_d} |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
194 pool = Pool(self.n_cpus) |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
195 tasks = [ (self.utr_coverages[utr], self.plot_path, utr, utr_d, self.coverage_weights, len(self.control_alignments), |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
196 len(self.treatment_alignments), self.search_start, self.local_minimum, self.coverage_threshold) \ |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
197 for utr, utr_d in self.utr_dict.iteritems() ] |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
198 #processed_tasks = [ pool.apply_async(calculate_all_utr, t) for t in tasks] |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
199 #result_list = [res.get() for res in processed_tasks] |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
200 result_list = [calculate_all_utr(*t) for t in tasks] # uncomment for easier debugging |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
201 for res_control, res_treatment in result_list: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
202 if not res_control: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
203 continue |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
204 for i, result in enumerate(res_control): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
205 if isinstance(result, dict): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
206 t = self.result_tuple(**result) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
207 result_d[result["gene"]+"_bp_control_{i}".format(i=i)] = t |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
208 for i, result in enumerate(res_treatment): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
209 if isinstance(result, dict): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
210 t = self.result_tuple(**result) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
211 result_d[result["gene"]+"_bp_treatment_{i}".format(i=i)] = t |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
212 return result_d |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
213 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
214 def write_results(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
215 w = csv.writer(self.result_file, delimiter='\t') |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
216 header = list(self.result_tuple._fields) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
217 header[0] = "#chr" |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
218 w.writerow(header) # field header |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
219 w.writerows( self.results) |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
220 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
221 def write_html(self): |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
222 output_lines = [(gene_str_to_link(result.gene), result.breakpoint, result.breakpoint_type, result.p_value ) for result in self.results] |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
223 if self.html_file: |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
224 self.html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value", "adj_p_value"], tablefmt="html")) |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
225 else: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
226 with open(os.path.join(self.plot_path, "index.html"), "w") as html_file: |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
227 html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value", "adj_p_value"], tablefmt="html")) |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
228 html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value", "adj_p_value"], tablefmt="html")) |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
229 |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
230 def write_bed(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
231 w = csv.writer(self.bed_output, delimiter='\t') |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
232 bed = [(result.chr, result.breakpoint, int(result.breakpoint)+1, result.gene+"_"+result.breakpoint_type, 0, result.strand) for result in self.results] |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
233 w.writerows(bed) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
234 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
235 |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
236 def calculate_all_utr(utr_coverage, plot_path, utr, utr_d, coverage_weights, num_control, num_treatment, search_start, local_minimum, coverage_threshold): |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
237 if utr_d["strand"] == "+": |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
238 is_reverse = False |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
239 else: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
240 is_reverse = True |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
241 control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances = \ |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
242 optimize_breakpoint(plot_path, utr, utr_coverage, utr_d["new_start"], utr_d["new_end"], coverage_weights, search_start, local_minimum, coverage_threshold, num_control) |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
243 res_control = breakpoints_to_result(utr, utr_d, control_breakpoints, "control_breakpoint", control_abundances, is_reverse, |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
244 num_control, num_treatment) |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
245 res_treatment = breakpoints_to_result(utr, utr_d, treatment_breakpoints, "treatment_breakpoint", treatment_abundances, is_reverse, |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
246 num_control, num_treatment) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
247 return res_control, res_treatment |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
248 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
249 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
250 def breakpoints_to_result(utr, utr_d, breakpoints, breakpoint_type, |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
251 abundances, is_reverse, num_control, num_treatment): |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
252 """ |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
253 Takes in a result dictionary res and fills the necessary fields |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
254 """ |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
255 if not breakpoints: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
256 return False |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
257 result = [] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
258 for breakpoint, abundance in zip(breakpoints, abundances): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
259 res = {} |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
260 res["adj_p_value"] = "NA" |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
261 long_coverage_vector, short_coverage_vector = abundance |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
262 percentage_long = long_coverage_vector/(long_coverage_vector+short_coverage_vector) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
263 for i in range(num_control): |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
264 res["control_{i}_coverage_long".format(i=i)] = long_coverage_vector[i] |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
265 res["control_{i}_coverage_short".format(i=i)] = short_coverage_vector[i] |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
266 res["control_{i}_percent_long".format(i=i)] = percentage_long[i] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
267 for k in range(num_treatment): |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
268 i = k + num_control |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
269 res["treatment_{i}_coverage_long".format(i=k)] = long_coverage_vector[i] |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
270 res["treatment_{i}_coverage_short".format(i=k)] = short_coverage_vector[i] |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
271 res["treatment_{i}_percent_long".format(i=k)] = percentage_long[i] |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
272 res["t_stat"], res["p_value"] = stat_test(percentage_long[:num_control], percentage_long[num_control:]) |
14
ea5d573d6d40
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
13
diff
changeset
|
273 control_mean_percent = np.mean(percentage_long[:num_control]) |
ea5d573d6d40
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
13
diff
changeset
|
274 treatment_mean_percent = np.mean(percentage_long[num_control:]) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
275 res["chr"] = utr_d["chr"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
276 res["start"] = utr_d["start"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
277 res["end"] = utr_d["end"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
278 res["strand"] = utr_d["strand"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
279 if is_reverse: |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
280 breakpoint = utr_d["new_end"] - breakpoint |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
281 else: |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
282 breakpoint = utr_d["new_start"] + breakpoint |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
283 res["breakpoint"] = breakpoint |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
284 res["breakpoint_type"] = breakpoint_type |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
285 res["control_mean_percent"] = control_mean_percent |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
286 res["treatment_mean_percent"] = treatment_mean_percent |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
287 res["gene"] = utr |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
288 result.append(res) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
289 return result |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
290 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
291 |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
292 def optimize_breakpoint(plot_path, utr, utr_coverage, UTR_start, UTR_end, coverage_weigths, search_start, local_minimum, coverage_threshold, num_control): |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
293 """ |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
294 We are searching for a point within the UTR that minimizes the mean squared error, if the coverage vector was divided |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
295 at that point. utr_coverage is a list with items corresponding to numpy arrays of coverage for a sample. |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
296 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
297 num_samples = len(utr_coverage) |
13
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
298 normalized_utr_coverage = np.array(utr_coverage)/np.expand_dims(coverage_weigths, axis=1) |
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
299 start_coverage = [np.mean(coverage[0:99]) for coverage in utr_coverage] # filters threshold on mean coverage over first 100 nt |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
300 is_above_threshold = sum(np.array(start_coverage) >= coverage_threshold) >= num_samples # This filters on the raw threshold. Why? |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
301 is_above_length = UTR_end - UTR_start >= 150 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
302 if (is_above_threshold) and (is_above_length): |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
303 search_end = UTR_end - UTR_start |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
304 breakpoints = range(search_start, search_end + 1) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
305 mse_list = [ estimate_mse(normalized_utr_coverage, bp, num_samples, num_control) for bp in breakpoints ] |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
306 mse_list = [mse_list[0] for i in xrange(search_start)] + mse_list |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
307 if plot_path: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
308 plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control) |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
309 if len(mse_list) > 0: |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
310 return mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples, local_minimum) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
311 return False, False, False, False |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
312 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
313 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
314 def plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control): |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
315 """ |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
316 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
317 """ |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
318 fig = plt.figure(figsize=(8, 8)) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
319 gs = gridspec.GridSpec(2, 1) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
320 ax1 = plt.subplot(gs[0, :]) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
321 ax2 = plt.subplot(gs[1, :]) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
322 ax1.set_title("mean-squared error plot") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
323 ax1.set_ylabel("mean-squared error") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
324 ax1.set_xlabel("nt after UTR start") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
325 ax2.set_title("coverage plot") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
326 ax2.set_xlabel("nt after UTR start") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
327 ax2.set_ylabel("normalized nucleotide coverage") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
328 mse_control = [ condition[0] for condition in mse_list] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
329 mse_treatment = [ condition[1] for condition in mse_list] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
330 minima_control = get_minima(np.array(mse_control)) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
331 minima_treatment = get_minima(np.array(mse_treatment)) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
332 control = normalized_utr_coverage[:num_control] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
333 treatment = normalized_utr_coverage[num_control:] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
334 ax1.plot(mse_control, "b-") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
335 ax1.plot(mse_treatment, "r-") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
336 [ax2.plot(cov, "b-") for cov in control] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
337 [ax2.plot(cov, "r-") for cov in treatment] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
338 [ax2.axvline(val, color="b", alpha=0.25) for val in minima_control] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
339 ax2.axvline(mse_control.index(min(mse_control)), color="b", alpha=1) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
340 [ax2.axvline(val, color="r", alpha=0.25) for val in minima_treatment] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
341 ax2.axvline(mse_treatment.index(min(mse_treatment)), color="r", alpha=1) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
342 fig.add_subplot(ax1) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
343 fig.add_subplot(ax2) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
344 gs.tight_layout(fig) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
345 fig.savefig(os.path.join(plot_path, "{utr}.svg".format(utr=utr))) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
346 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
347 |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
348 def mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples, local_minimum): |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
349 """ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
350 Take in mse_list with control and treatment mse and return breakpoint and utr abundance for all local minima |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
351 in mse_list |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
352 """ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
353 mse_control = np.array([mse[0] for mse in mse_list]) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
354 mse_treatment = np.array([mse[1] for mse in mse_list]) |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
355 control_breakpoints = list(get_minima(mse_control, local_minimum)) |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
356 treatment_breakpoints = list(get_minima(mse_treatment, local_minimum)) |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
357 control_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in control_breakpoints] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
358 treatment_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in treatment_breakpoints] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
359 return control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
360 |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
361 def get_minima(a, local_minimum=False): |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
362 """ |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
363 get minima for numpy array a. If local is false, only return absolute minimum |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
364 """ |
17
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
365 if not local_minimum: |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
366 return np.where(a == a.min()) |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
367 else: |
917a2f7ab841
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
16
diff
changeset
|
368 return np.where(np.r_[True, a[1:] < a[:-1]] & np.r_[a[:-1] < a[1:], True])[0]+1 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
369 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
370 def estimate_mse(cov, bp, num_samples, num_control): |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
371 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
372 get abundance of long utr vs short utr with breakpoint specifying the position of long and short utr. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
373 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
374 with warnings.catch_warnings(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
375 warnings.simplefilter("ignore", category=RuntimeWarning) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
376 long_utr_vector = cov[:num_samples, bp:] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
377 short_utr_vector = cov[:num_samples, 0:bp] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
378 mean_long_utr = np.mean(long_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
379 mean_short_utr = np.mean(short_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
380 square_mean_centered_short_utr_vector = (short_utr_vector[:num_samples] - mean_short_utr[:, np.newaxis] )**2 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
381 square_mean_centered_long_utr_vector = (long_utr_vector[:num_samples] - mean_long_utr[:, np.newaxis])**2 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
382 mse_control = np.mean(np.append(square_mean_centered_long_utr_vector[:num_control], square_mean_centered_short_utr_vector[:num_control])) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
383 mse_treatment = np.mean(np.append(square_mean_centered_long_utr_vector[num_control:], square_mean_centered_short_utr_vector[num_control:])) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
384 return mse_control, mse_treatment |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
385 |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
386 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
387 def estimate_abundance(cov, bp, num_samples): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
388 with warnings.catch_warnings(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
389 warnings.simplefilter("ignore", category=RuntimeWarning) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
390 long_utr_vector = cov[:num_samples, bp:] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
391 short_utr_vector = cov[:num_samples, 0:bp] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
392 mean_long_utr = np.mean(long_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
393 mean_short_utr = np.mean(short_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
394 return mean_long_utr, mean_short_utr |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
395 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
396 def stat_test(a,b): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
397 return stats.ttest_ind(a,b) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
398 |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
399 def gene_str_to_link(str): |
14
ea5d573d6d40
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
13
diff
changeset
|
400 return "<a href=\"{str}.svg\" type=\"image/svg+xml\">{str}</a>".format(str=str) |
ea5d573d6d40
planemo upload for repository https://github.com/mvdbeek/dapars commit b1b007c561ea6c9db145c88b6b128d66ecd05e24-dirty
mvdbeek
parents:
13
diff
changeset
|
401 |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
402 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
403 if __name__ == '__main__': |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
404 args = parse_args() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
405 find_utr = UtrFinder(args) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
406 |