Mercurial > repos > mvdbeek > dapars
annotate dapars.py @ 11:b2e46d8f9487 draft
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
author | mvdbeek |
---|---|
date | Thu, 29 Oct 2015 18:28:03 -0400 |
parents | a5d8b08af089 |
children | 538c4e2b423e |
rev | line source |
---|---|
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
1 import argparse |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
2 import os |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
3 import csv |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
4 import numpy as np |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
5 from scipy import stats |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
6 from collections import OrderedDict, namedtuple |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
7 import filter_utr |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
8 import subprocess |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
9 from multiprocessing import Pool |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
10 import warnings |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
11 import matplotlib.pyplot as plt |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
12 import matplotlib.gridspec as gridspec |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
13 from tabulate import tabulate |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
14 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
15 def directory_path(str): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
16 if os.path.exists(str): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
17 return str |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
18 else: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
19 os.mkdir(str) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
20 return str |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
21 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
22 def parse_args(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
23 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
24 Returns floating point values except for input files. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
25 My initial approach will not filter anything. (FDR. fold_change, PDUI, Num_least ...) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
26 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
27 parser = argparse.ArgumentParser(prog='DaPars', description='Determines the usage of proximal polyA usage') |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
28 parser.add_argument("-c", "--control_alignments", nargs="+", required=True, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
29 help="Alignment files in BAM format from control condition") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
30 parser.add_argument("-t", "--treatment_alignments", nargs="+", required=True, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
31 help="Alignment files in BAM format from treatment condition") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
32 parser.add_argument("-u", "--utr_bed_file", required=True, type=file, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
33 help="Bed file describing longest 3UTR positions") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
34 parser.add_argument("-o", "--output_file", required=True, type=argparse.FileType('w'), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
35 help="file containing output") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
36 parser.add_argument("-cpu", required=False, type=int, default=1, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
37 help="Number of CPU cores to use.") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
38 parser.add_argument("-s", "--search_start", required=False, type=int, default=50, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
39 help="Start search for breakpoint n nucleotides downstream of UTR start") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
40 parser.add_argument("-ct", "--coverage_threshold", required=False, type=float, default=20, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
41 help="minimum coverage in each aligment to be considered for determining breakpoints") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
42 parser.add_argument("-b", "--breakpoint_bed", required=False, type=argparse.FileType('w'), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
43 help="Write bedfile with coordinates of breakpoint positions to supplied path.") |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
44 parser.add_argument("-v", "--version", action='version', version='%(prog)s 0.2.0') |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
45 parser.add_argument("-p", "--plot_path", default=None, required=False, type=directory_path, |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
46 help="If plot_path is specified will write a coverage plot for every UTR in that directory.") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
47 parser.add_argument("-html", "--html_file", default=None, required=False, type=argparse.FileType('w'), |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
48 help="Write an html file to the specified location. Only to be used within a galaxy wrapper") |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
49 return parser.parse_args() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
50 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
51 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
52 class UtrFinder(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
53 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
54 This seems to be the main caller. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
55 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
56 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
57 def __init__(self, args): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
58 self.control_alignments = [file for file in args.control_alignments] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
59 self.treatment_alignments = [file for file in args.treatment_alignments] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
60 self.n_cpus = args.cpu |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
61 self.search_start = args.search_start |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
62 self.coverage_threshold = args.coverage_threshold |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
63 self.plot_path = args.plot_path |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
64 self.html_file = args.html_file |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
65 self.utr = args.utr_bed_file |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
66 self.gtf_fields = filter_utr.get_gtf_fields() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
67 self.result_file = args.output_file |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
68 self.all_alignments = self.control_alignments + self.treatment_alignments |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
69 self.alignment_names = { file: os.path.basename(file) for file in self.all_alignments } |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
70 self.num_samples = len(self.all_alignments) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
71 self.utr_dict = self.get_utr_dict(0.2) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
72 self.dump_utr_dict_to_bedfile() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
73 print "Established dictionary of 3\'UTRs" |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
74 self.coverage_files = self.run_bedtools_coverage() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
75 self.utr_coverages = self.read_coverage_result() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
76 print "Established dictionary of 3\'UTR coverages" |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
77 self.coverage_weights = self.get_coverage_weights() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
78 self.result_tuple = self.get_result_tuple() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
79 self.result_d = self.calculate_apa_ratios() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
80 self.write_results() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
81 if args.breakpoint_bed: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
82 self.bed_output = args.breakpoint_bed |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
83 self.write_bed() |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
84 if self.plot_path: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
85 self.write_html() |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
86 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
87 def dump_utr_dict_to_bedfile(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
88 w = csv.writer(open("tmp_bedfile.bed", "w"), delimiter="\t") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
89 for gene, utr in self.utr_dict.iteritems(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
90 w.writerow([utr["chr"], utr["new_start"]-1, utr["new_end"], gene, ".", utr["strand"]]) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
91 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
92 def run_bedtools_coverage(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
93 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
94 Use bedtools coverage to generate pileup data for all alignment files for the regions specified in utr_dict. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
95 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
96 coverage_files = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
97 cmds = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
98 for alignment_file in self.all_alignments: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
99 cmd = "sort -k1,1 -k2,2n tmp_bedfile.bed | " |
3
aca85eb3eb5b
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
2
diff
changeset
|
100 cmd = cmd + "bedtools coverage -d -s -abam {alignment_file} -b stdin |" \ |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
101 " cut -f 4,7,8 > coverage_file_{alignment_name}".format( |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
102 alignment_file = alignment_file, alignment_name= self.alignment_names[alignment_file] ) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
103 cmds.append(cmd) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
104 pool = Pool(self.n_cpus) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
105 subprocesses = [subprocess.Popen([cmd], shell=True) for cmd in cmds] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
106 [p.wait() for p in subprocesses] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
107 coverage_files = ["gene_position_coverage_{alignment_name}".format( |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
108 alignment_name = self.alignment_names[alignment_file]) for alignment_file in self.all_alignments ] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
109 return coverage_files |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
110 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
111 def read_coverage_result(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
112 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
113 Read coverages back in and store as dictionary of numpy arrays |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
114 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
115 coverage_dict = { gene: { name: np.zeros(utr_d["new_end"]+1-utr_d["new_start"]) for name in self.alignment_names.itervalues() } for gene, utr_d in self.utr_dict.iteritems() } |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
116 for alignment_name in self.alignment_names.itervalues(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
117 with open("coverage_file_{alignment_name}".format(alignment_name = alignment_name)) as coverage_file: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
118 for line in coverage_file: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
119 gene, position, coverage= line.strip().split("\t") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
120 coverage_dict[gene][alignment_name][int(position)-1] = coverage |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
121 for utr_d in self.utr_dict.itervalues(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
122 if utr_d["strand"] == "-": |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
123 for alignment_name in self.alignment_names.values(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
124 coverage_dict[gene][alignment_name] = coverage_dict[gene][alignment_name][::-1] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
125 return coverage_dict |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
126 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
127 def get_utr_dict(self, shift): |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
128 """ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
129 The utr end is extended by UTR length * shift, to discover novel distal polyA sites. |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
130 Set to 0 to disable. |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
131 """ |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
132 utr_dict = OrderedDict() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
133 for line in self.utr: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
134 if not line.startswith("#"): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
135 filter_utr.get_feature_dict( line=line, gtf_fields=self.gtf_fields, utr_dict=utr_dict, feature="UTR" ) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
136 gene, utr_d = utr_dict.popitem() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
137 utr_d = utr_d[0] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
138 end_shift = int(round(abs(utr_d["start"] - utr_d["end"]) * shift)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
139 if utr_d["strand"] == "+": |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
140 utr_d["new_end"] = utr_d["end"] - end_shift |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
141 utr_d["new_start"] = utr_d["start"] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
142 else: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
143 utr_d["new_end"] = utr_d["end"] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
144 utr_d["new_start"] = utr_d["start"] + end_shift |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
145 if utr_d["new_start"] + 50 < utr_d["new_end"]: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
146 utr_dict[gene] = utr_d |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
147 return utr_dict |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
148 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
149 def get_coverage_weights(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
150 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
151 Return weights for normalizing coverage. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
152 utr_coverage is still confusing. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
153 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
154 coverage_per_alignment = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
155 for utr in self.utr_coverages.itervalues(): # TODO: be smarter about this. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
156 utr_coverage = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
157 for vector in utr.itervalues(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
158 utr_coverage.append(np.sum(vector)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
159 coverage_per_alignment.append(utr_coverage) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
160 coverages = np.array([ sum(x) for x in zip(*coverage_per_alignment) ]) |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
161 coverage_weights = coverages / np.mean(coverages) # TODO: proabably median is better suited? Or even no normalization! |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
162 return coverage_weights |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
163 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
164 def get_result_tuple(self): |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
165 static_desc = ["chr", "start", "end", "strand", "gene", "t_stat", "p_value", "breakpoint", |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
166 "breakpoint_type", "control_mean_percent", "treatment_mean_percent" ] |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
167 samples_desc = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
168 for statistic in ["coverage_long", "coverage_short", "percent_long"]: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
169 for i, sample in enumerate(self.control_alignments): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
170 samples_desc.append("control_{i}_{statistic}".format(i=i, statistic = statistic)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
171 for i, sample in enumerate(self.treatment_alignments): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
172 samples_desc.append("treatment_{i}_{statistic}".format(i=i, statistic = statistic)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
173 return namedtuple("result", static_desc + samples_desc) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
174 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
175 def calculate_apa_ratios(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
176 result_d = OrderedDict() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
177 arg_d = {"result_tuple": self.result_tuple, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
178 "coverage_weights":self.coverage_weights, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
179 "num_samples":self.num_samples, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
180 "num_control":len(self.control_alignments), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
181 "num_treatment":len(self.treatment_alignments), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
182 "result_d":result_d} |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
183 pool = Pool(self.n_cpus) |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
184 tasks = [ (self.utr_coverages[utr], self.plot_path, utr, utr_d, self.coverage_weights, len(self.control_alignments), |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
185 len(self.treatment_alignments), self.search_start, self.coverage_threshold) \ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
186 for utr, utr_d in self.utr_dict.iteritems() ] |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
187 processed_tasks = [ pool.apply_async(calculate_all_utr, t) for t in tasks] |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
188 result_list = [res.get() for res in processed_tasks] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
189 for res_control, res_treatment in result_list: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
190 if not res_control: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
191 continue |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
192 for i, result in enumerate(res_control): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
193 if isinstance(result, dict): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
194 t = self.result_tuple(**result) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
195 result_d[result["gene"]+"_bp_control_{i}".format(i=i)] = t |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
196 for i, result in enumerate(res_treatment): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
197 if isinstance(result, dict): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
198 t = self.result_tuple(**result) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
199 result_d[result["gene"]+"_bp_treatment_{i}".format(i=i)] = t |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
200 return result_d |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
201 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
202 def write_results(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
203 w = csv.writer(self.result_file, delimiter='\t') |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
204 header = list(self.result_tuple._fields) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
205 header[0] = "#chr" |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
206 w.writerow(header) # field header |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
207 w.writerows( self.result_d.values()) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
208 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
209 def write_html(self): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
210 output_lines = [(gene_str_to_link(result.gene), result.breakpoint, result.breakpoint_type, result.p_value ) for result in self.result_d.itervalues()] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
211 if self.html_file: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
212 self.html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value"], tablefmt="html")) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
213 else: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
214 with open(os.path.join(self.plot_path, "index.html"), "w") as html_file: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
215 html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value"], tablefmt="html")) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
216 |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
217 def write_bed(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
218 w = csv.writer(self.bed_output, delimiter='\t') |
4
73b932244237
planemo upload for repository https://github.com/mvdbeek/dapars commit c2344ee1b8c6371007b7484b02d17fb056eb2427-dirty
mvdbeek
parents:
3
diff
changeset
|
219 bed = [(result.chr, result.breakpoint, int(result.breakpoint)+1, result.gene+"_"+result.breakpoint_type, 0, result.strand) for result in self.result_d.itervalues()] |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
220 w.writerows(bed) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
221 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
222 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
223 def calculate_all_utr(utr_coverage, plot_path, utr, utr_d, coverage_weights, num_control, num_treatment, search_start, coverage_threshold): |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
224 if utr_d["strand"] == "+": |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
225 is_reverse = False |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
226 else: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
227 is_reverse = True |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
228 control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances = \ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
229 optimize_breakpoint(plot_path, utr, utr_coverage, utr_d["new_start"], utr_d["new_end"], coverage_weights, search_start, coverage_threshold, num_control) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
230 res_control = breakpoints_to_result(utr, utr_d, control_breakpoints, "control_breakpoint", control_abundances, is_reverse, |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
231 num_control, num_treatment) |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
232 res_treatment = breakpoints_to_result(utr, utr_d, treatment_breakpoints, "treatment_breakpoint", treatment_abundances, is_reverse, |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
233 num_control, num_treatment) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
234 return res_control, res_treatment |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
235 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
236 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
237 def breakpoints_to_result(utr, utr_d, breakpoints, breakpoint_type, |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
238 abundances, is_reverse, num_control, num_treatment): |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
239 """ |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
240 Takes in a result dictionary res and fills the necessary fields |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
241 """ |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
242 if not breakpoints: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
243 return False |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
244 result = [] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
245 for breakpoint, abundance in zip(breakpoints, abundances): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
246 res = {} |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
247 long_coverage_vector = abundance[0] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
248 short_coverage_vector = abundance[1] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
249 percentage_long = long_coverage_vector/(long_coverage_vector+short_coverage_vector) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
250 for i in range(num_control): |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
251 res["control_{i}_coverage_long".format(i=i)] = float(long_coverage_vector[i]) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
252 res["control_{i}_coverage_short".format(i=i)] = float(short_coverage_vector[i]) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
253 res["control_{i}_percent_long".format(i=i)] = percentage_long[i] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
254 for k in range(num_treatment): |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
255 i = k + num_control |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
256 res["treatment_{i}_coverage_long".format(i=k)] = float(long_coverage_vector[i]) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
257 res["treatment_{i}_coverage_short".format(i=k)] = float(short_coverage_vector[i]) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
258 res["treatment_{i}_percent_long".format(i=k)] = percentage_long[i] |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
259 res["t_stat"], res["p_value"] = stat_test(percentage_long[:num_control], percentage_long[num_control:]) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
260 control_mean_percent = np.mean(np.array(percentage_long[:num_control])) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
261 treatment_mean_percent = np.mean(np.array(percentage_long[num_control:])) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
262 res["chr"] = utr_d["chr"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
263 res["start"] = utr_d["start"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
264 res["end"] = utr_d["end"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
265 res["strand"] = utr_d["strand"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
266 if is_reverse: |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
267 breakpoint = utr_d["new_end"] - breakpoint |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
268 else: |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
269 breakpoint = utr_d["new_start"] + breakpoint |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
270 res["breakpoint"] = breakpoint |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
271 res["breakpoint_type"] = breakpoint_type |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
272 res["control_mean_percent"] = control_mean_percent |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
273 res["treatment_mean_percent"] = treatment_mean_percent |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
274 res["gene"] = utr |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
275 result.append(res) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
276 return result |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
277 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
278 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
279 def optimize_breakpoint(plot_path, utr, utr_coverage, UTR_start, UTR_end, coverage_weigths, search_start, coverage_threshold, num_control): |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
280 """ |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
281 We are searching for a point within the UTR that minimizes the mean squared error, if the coverage vector was divided |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
282 at that point. utr_coverage is a list with items corresponding to numpy arrays of coverage for a sample. |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
283 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
284 num_samples = len(utr_coverage) |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
285 normalized_utr_coverage = np.array(utr_coverage.values())/np.expand_dims(coverage_weigths, axis=1) |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
286 start_coverage = [np.mean(coverage[0:99]) for coverage in utr_coverage.values()] # filters threshold on mean coverage over first 100 nt |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
287 is_above_threshold = sum(np.array(start_coverage) >= coverage_threshold) >= num_samples # This filters on the raw threshold. Why? |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
288 is_above_length = UTR_end - UTR_start >= 150 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
289 if (is_above_threshold) and (is_above_length): |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
290 search_end = UTR_end - UTR_start |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
291 breakpoints = range(search_start, search_end + 1) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
292 mse_list = [ estimate_mse(normalized_utr_coverage, bp, num_samples, num_control) for bp in breakpoints ] |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
293 mse_list = [mse_list[0] for i in xrange(search_start)] + mse_list |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
294 if plot_path: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
295 plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control) |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
296 if len(mse_list) > 0: |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
297 return mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
298 return False, False, False, False |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
299 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
300 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
301 def plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control): |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
302 """ |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
303 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
304 """ |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
305 fig = plt.figure(figsize=(8, 8)) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
306 gs = gridspec.GridSpec(2, 1) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
307 ax1 = plt.subplot(gs[0, :]) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
308 ax2 = plt.subplot(gs[1, :]) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
309 ax1.set_title("mean-squared error plot") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
310 ax1.set_ylabel("mean-squared error") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
311 ax1.set_xlabel("nt after UTR start") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
312 ax2.set_title("coverage plot") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
313 ax2.set_xlabel("nt after UTR start") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
314 ax2.set_ylabel("normalized nucleotide coverage") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
315 mse_control = [ condition[0] for condition in mse_list] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
316 mse_treatment = [ condition[1] for condition in mse_list] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
317 minima_control = get_minima(np.array(mse_control)) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
318 minima_treatment = get_minima(np.array(mse_treatment)) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
319 control = normalized_utr_coverage[:num_control] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
320 treatment = normalized_utr_coverage[num_control:] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
321 ax1.plot(mse_control, "b-") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
322 ax1.plot(mse_treatment, "r-") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
323 [ax2.plot(cov, "b-") for cov in control] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
324 [ax2.plot(cov, "r-") for cov in treatment] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
325 [ax2.axvline(val, color="b", alpha=0.25) for val in minima_control] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
326 ax2.axvline(mse_control.index(min(mse_control)), color="b", alpha=1) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
327 [ax2.axvline(val, color="r", alpha=0.25) for val in minima_treatment] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
328 ax2.axvline(mse_treatment.index(min(mse_treatment)), color="r", alpha=1) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
329 fig.add_subplot(ax1) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
330 fig.add_subplot(ax2) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
331 gs.tight_layout(fig) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
332 fig.savefig(os.path.join(plot_path, "{utr}.svg".format(utr=utr))) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
333 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
334 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
335 def mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
336 """ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
337 Take in mse_list with control and treatment mse and return breakpoint and utr abundance for all local minima |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
338 in mse_list |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
339 """ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
340 mse_control = np.array([mse[0] for mse in mse_list]) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
341 mse_treatment = np.array([mse[1] for mse in mse_list]) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
342 control_breakpoints = list(get_minima(mse_control)) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
343 treatment_breakpoints = list(get_minima(mse_treatment)) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
344 control_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in control_breakpoints] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
345 treatment_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in treatment_breakpoints] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
346 return control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
347 |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
348 def get_minima(a): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
349 """ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
350 get minima for numpy array a |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
351 """ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
352 return np.where(np.r_[True, a[1:] < a[:-1]] & np.r_[a[:-1] < a[1:], True])[0]+1 |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
353 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
354 def estimate_mse(cov, bp, num_samples, num_control): |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
355 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
356 get abundance of long utr vs short utr with breakpoint specifying the position of long and short utr. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
357 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
358 with warnings.catch_warnings(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
359 warnings.simplefilter("ignore", category=RuntimeWarning) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
360 long_utr_vector = cov[:num_samples, bp:] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
361 short_utr_vector = cov[:num_samples, 0:bp] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
362 mean_long_utr = np.mean(long_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
363 mean_short_utr = np.mean(short_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
364 square_mean_centered_short_utr_vector = (short_utr_vector[:num_samples] - mean_short_utr[:, np.newaxis] )**2 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
365 square_mean_centered_long_utr_vector = (long_utr_vector[:num_samples] - mean_long_utr[:, np.newaxis])**2 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
366 mse_control = np.mean(np.append(square_mean_centered_long_utr_vector[:num_control], square_mean_centered_short_utr_vector[:num_control])) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
367 mse_treatment = np.mean(np.append(square_mean_centered_long_utr_vector[num_control:], square_mean_centered_short_utr_vector[num_control:])) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
368 return mse_control, mse_treatment |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
369 |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
370 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
371 def estimate_abundance(cov, bp, num_samples): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
372 with warnings.catch_warnings(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
373 warnings.simplefilter("ignore", category=RuntimeWarning) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
374 long_utr_vector = cov[:num_samples, bp:] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
375 short_utr_vector = cov[:num_samples, 0:bp] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
376 mean_long_utr = np.mean(long_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
377 mean_short_utr = np.mean(short_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
378 return mean_long_utr, mean_short_utr |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
379 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
380 def stat_test(a,b): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
381 return stats.ttest_ind(a,b) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
382 |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
383 def gene_str_to_link(str): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
384 return "<a href=\"{str}.svg\" type=\"image/svg+xml\" target=\"_blank\">{str}</a>".format(str=str) |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
385 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
386 if __name__ == '__main__': |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
387 args = parse_args() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
388 find_utr = UtrFinder(args) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
389 |