Mercurial > repos > mvdbeek > dapars
annotate dapars.py @ 13:538c4e2b423e draft
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
author | mvdbeek |
---|---|
date | Fri, 30 Oct 2015 04:59:15 -0400 |
parents | a5d8b08af089 |
children | ea5d573d6d40 |
rev | line source |
---|---|
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
1 import argparse |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
2 import os |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
3 import csv |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
4 import numpy as np |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
5 from scipy import stats |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
6 from collections import OrderedDict, namedtuple |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
7 import filter_utr |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
8 import subprocess |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
9 from multiprocessing import Pool |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
10 import warnings |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
11 import matplotlib.pyplot as plt |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
12 import matplotlib.gridspec as gridspec |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
13 from tabulate import tabulate |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
14 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
15 def directory_path(str): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
16 if os.path.exists(str): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
17 return str |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
18 else: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
19 os.mkdir(str) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
20 return str |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
21 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
22 def parse_args(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
23 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
24 Returns floating point values except for input files. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
25 My initial approach will not filter anything. (FDR. fold_change, PDUI, Num_least ...) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
26 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
27 parser = argparse.ArgumentParser(prog='DaPars', description='Determines the usage of proximal polyA usage') |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
28 parser.add_argument("-c", "--control_alignments", nargs="+", required=True, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
29 help="Alignment files in BAM format from control condition") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
30 parser.add_argument("-t", "--treatment_alignments", nargs="+", required=True, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
31 help="Alignment files in BAM format from treatment condition") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
32 parser.add_argument("-u", "--utr_bed_file", required=True, type=file, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
33 help="Bed file describing longest 3UTR positions") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
34 parser.add_argument("-o", "--output_file", required=True, type=argparse.FileType('w'), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
35 help="file containing output") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
36 parser.add_argument("-cpu", required=False, type=int, default=1, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
37 help="Number of CPU cores to use.") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
38 parser.add_argument("-s", "--search_start", required=False, type=int, default=50, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
39 help="Start search for breakpoint n nucleotides downstream of UTR start") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
40 parser.add_argument("-ct", "--coverage_threshold", required=False, type=float, default=20, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
41 help="minimum coverage in each aligment to be considered for determining breakpoints") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
42 parser.add_argument("-b", "--breakpoint_bed", required=False, type=argparse.FileType('w'), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
43 help="Write bedfile with coordinates of breakpoint positions to supplied path.") |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
44 parser.add_argument("-v", "--version", action='version', version='%(prog)s 0.2.0') |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
45 parser.add_argument("-p", "--plot_path", default=None, required=False, type=directory_path, |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
46 help="If plot_path is specified will write a coverage plot for every UTR in that directory.") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
47 parser.add_argument("-html", "--html_file", default=None, required=False, type=argparse.FileType('w'), |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
48 help="Write an html file to the specified location. Only to be used within a galaxy wrapper") |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
49 return parser.parse_args() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
50 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
51 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
52 class UtrFinder(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
53 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
54 This seems to be the main caller. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
55 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
56 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
57 def __init__(self, args): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
58 self.control_alignments = [file for file in args.control_alignments] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
59 self.treatment_alignments = [file for file in args.treatment_alignments] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
60 self.n_cpus = args.cpu |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
61 self.search_start = args.search_start |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
62 self.coverage_threshold = args.coverage_threshold |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
63 self.plot_path = args.plot_path |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
64 self.html_file = args.html_file |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
65 self.utr = args.utr_bed_file |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
66 self.gtf_fields = filter_utr.get_gtf_fields() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
67 self.result_file = args.output_file |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
68 self.all_alignments = self.control_alignments + self.treatment_alignments |
13
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
69 self.alignment_names = OrderedDict(( file, os.path.basename(file)) for file in self.all_alignments ) |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
70 self.num_samples = len(self.all_alignments) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
71 self.utr_dict = self.get_utr_dict(0.2) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
72 self.dump_utr_dict_to_bedfile() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
73 print "Established dictionary of 3\'UTRs" |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
74 self.coverage_files = self.run_bedtools_coverage() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
75 self.utr_coverages = self.read_coverage_result() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
76 print "Established dictionary of 3\'UTR coverages" |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
77 self.coverage_weights = self.get_coverage_weights() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
78 self.result_tuple = self.get_result_tuple() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
79 self.result_d = self.calculate_apa_ratios() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
80 self.write_results() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
81 if args.breakpoint_bed: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
82 self.bed_output = args.breakpoint_bed |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
83 self.write_bed() |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
84 if self.plot_path: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
85 self.write_html() |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
86 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
87 def dump_utr_dict_to_bedfile(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
88 w = csv.writer(open("tmp_bedfile.bed", "w"), delimiter="\t") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
89 for gene, utr in self.utr_dict.iteritems(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
90 w.writerow([utr["chr"], utr["new_start"]-1, utr["new_end"], gene, ".", utr["strand"]]) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
91 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
92 def run_bedtools_coverage(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
93 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
94 Use bedtools coverage to generate pileup data for all alignment files for the regions specified in utr_dict. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
95 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
96 cmds = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
97 for alignment_file in self.all_alignments: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
98 cmd = "sort -k1,1 -k2,2n tmp_bedfile.bed | " |
13
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
99 cmd = cmd + "bedtools coverage -d -s -abam {alignment_file} -b tmp_bedfile.bed |" \ |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
100 " cut -f 4,7,8 > coverage_file_{alignment_name}".format( |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
101 alignment_file = alignment_file, alignment_name= self.alignment_names[alignment_file] ) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
102 cmds.append(cmd) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
103 subprocesses = [subprocess.Popen([cmd], shell=True) for cmd in cmds] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
104 [p.wait() for p in subprocesses] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
105 coverage_files = ["gene_position_coverage_{alignment_name}".format( |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
106 alignment_name = self.alignment_names[alignment_file]) for alignment_file in self.all_alignments ] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
107 return coverage_files |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
108 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
109 def read_coverage_result(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
110 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
111 Read coverages back in and store as dictionary of numpy arrays |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
112 """ |
13
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
113 coverage_dict = { gene: [np.zeros(utr_d["new_end"]+1-utr_d["new_start"]) for name in self.alignment_names.values() ] for gene, utr_d in self.utr_dict.items() } |
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
114 for i, alignment_name in enumerate(self.alignment_names.values()): |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
115 with open("coverage_file_{alignment_name}".format(alignment_name = alignment_name)) as coverage_file: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
116 for line in coverage_file: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
117 gene, position, coverage= line.strip().split("\t") |
13
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
118 coverage_dict[gene][i][int(position)-1] = coverage |
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
119 for utr_d in self.utr_dict.values(): |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
120 if utr_d["strand"] == "-": |
13
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
121 for i, alignment_name in enumerate(self.alignment_names.values()): |
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
122 coverage_dict[gene][i] = coverage_dict[gene][i][::-1] |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
123 return coverage_dict |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
124 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
125 def get_utr_dict(self, shift): |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
126 """ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
127 The utr end is extended by UTR length * shift, to discover novel distal polyA sites. |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
128 Set to 0 to disable. |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
129 """ |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
130 utr_dict = OrderedDict() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
131 for line in self.utr: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
132 if not line.startswith("#"): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
133 filter_utr.get_feature_dict( line=line, gtf_fields=self.gtf_fields, utr_dict=utr_dict, feature="UTR" ) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
134 gene, utr_d = utr_dict.popitem() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
135 utr_d = utr_d[0] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
136 end_shift = int(round(abs(utr_d["start"] - utr_d["end"]) * shift)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
137 if utr_d["strand"] == "+": |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
138 utr_d["new_end"] = utr_d["end"] - end_shift |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
139 utr_d["new_start"] = utr_d["start"] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
140 else: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
141 utr_d["new_end"] = utr_d["end"] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
142 utr_d["new_start"] = utr_d["start"] + end_shift |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
143 if utr_d["new_start"] + 50 < utr_d["new_end"]: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
144 utr_dict[gene] = utr_d |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
145 return utr_dict |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
146 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
147 def get_coverage_weights(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
148 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
149 Return weights for normalizing coverage. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
150 utr_coverage is still confusing. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
151 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
152 coverage_per_alignment = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
153 for utr in self.utr_coverages.itervalues(): # TODO: be smarter about this. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
154 utr_coverage = [] |
13
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
155 for vector in utr: |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
156 utr_coverage.append(np.sum(vector)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
157 coverage_per_alignment.append(utr_coverage) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
158 coverages = np.array([ sum(x) for x in zip(*coverage_per_alignment) ]) |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
159 coverage_weights = coverages / np.mean(coverages) # TODO: proabably median is better suited? Or even no normalization! |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
160 return coverage_weights |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
161 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
162 def get_result_tuple(self): |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
163 static_desc = ["chr", "start", "end", "strand", "gene", "t_stat", "p_value", "breakpoint", |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
164 "breakpoint_type", "control_mean_percent", "treatment_mean_percent" ] |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
165 samples_desc = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
166 for statistic in ["coverage_long", "coverage_short", "percent_long"]: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
167 for i, sample in enumerate(self.control_alignments): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
168 samples_desc.append("control_{i}_{statistic}".format(i=i, statistic = statistic)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
169 for i, sample in enumerate(self.treatment_alignments): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
170 samples_desc.append("treatment_{i}_{statistic}".format(i=i, statistic = statistic)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
171 return namedtuple("result", static_desc + samples_desc) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
172 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
173 def calculate_apa_ratios(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
174 result_d = OrderedDict() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
175 arg_d = {"result_tuple": self.result_tuple, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
176 "coverage_weights":self.coverage_weights, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
177 "num_samples":self.num_samples, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
178 "num_control":len(self.control_alignments), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
179 "num_treatment":len(self.treatment_alignments), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
180 "result_d":result_d} |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
181 pool = Pool(self.n_cpus) |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
182 tasks = [ (self.utr_coverages[utr], self.plot_path, utr, utr_d, self.coverage_weights, len(self.control_alignments), |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
183 len(self.treatment_alignments), self.search_start, self.coverage_threshold) \ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
184 for utr, utr_d in self.utr_dict.iteritems() ] |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
185 processed_tasks = [ pool.apply_async(calculate_all_utr, t) for t in tasks] |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
186 result_list = [res.get() for res in processed_tasks] |
13
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
187 #result_list = [calculate_all_utr(*t) for t in tasks] # uncomment for easier debugging |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
188 for res_control, res_treatment in result_list: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
189 if not res_control: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
190 continue |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
191 for i, result in enumerate(res_control): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
192 if isinstance(result, dict): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
193 t = self.result_tuple(**result) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
194 result_d[result["gene"]+"_bp_control_{i}".format(i=i)] = t |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
195 for i, result in enumerate(res_treatment): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
196 if isinstance(result, dict): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
197 t = self.result_tuple(**result) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
198 result_d[result["gene"]+"_bp_treatment_{i}".format(i=i)] = t |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
199 return result_d |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
200 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
201 def write_results(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
202 w = csv.writer(self.result_file, delimiter='\t') |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
203 header = list(self.result_tuple._fields) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
204 header[0] = "#chr" |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
205 w.writerow(header) # field header |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
206 w.writerows( self.result_d.values()) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
207 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
208 def write_html(self): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
209 output_lines = [(gene_str_to_link(result.gene), result.breakpoint, result.breakpoint_type, result.p_value ) for result in self.result_d.itervalues()] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
210 if self.html_file: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
211 self.html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value"], tablefmt="html")) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
212 else: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
213 with open(os.path.join(self.plot_path, "index.html"), "w") as html_file: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
214 html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value"], tablefmt="html")) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
215 |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
216 def write_bed(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
217 w = csv.writer(self.bed_output, delimiter='\t') |
4
73b932244237
planemo upload for repository https://github.com/mvdbeek/dapars commit c2344ee1b8c6371007b7484b02d17fb056eb2427-dirty
mvdbeek
parents:
3
diff
changeset
|
218 bed = [(result.chr, result.breakpoint, int(result.breakpoint)+1, result.gene+"_"+result.breakpoint_type, 0, result.strand) for result in self.result_d.itervalues()] |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
219 w.writerows(bed) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
220 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
221 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
222 def calculate_all_utr(utr_coverage, plot_path, utr, utr_d, coverage_weights, num_control, num_treatment, search_start, coverage_threshold): |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
223 if utr_d["strand"] == "+": |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
224 is_reverse = False |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
225 else: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
226 is_reverse = True |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
227 control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances = \ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
228 optimize_breakpoint(plot_path, utr, utr_coverage, utr_d["new_start"], utr_d["new_end"], coverage_weights, search_start, coverage_threshold, num_control) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
229 res_control = breakpoints_to_result(utr, utr_d, control_breakpoints, "control_breakpoint", control_abundances, is_reverse, |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
230 num_control, num_treatment) |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
231 res_treatment = breakpoints_to_result(utr, utr_d, treatment_breakpoints, "treatment_breakpoint", treatment_abundances, is_reverse, |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
232 num_control, num_treatment) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
233 return res_control, res_treatment |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
234 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
235 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
236 def breakpoints_to_result(utr, utr_d, breakpoints, breakpoint_type, |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
237 abundances, is_reverse, num_control, num_treatment): |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
238 """ |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
239 Takes in a result dictionary res and fills the necessary fields |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
240 """ |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
241 if not breakpoints: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
242 return False |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
243 result = [] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
244 for breakpoint, abundance in zip(breakpoints, abundances): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
245 res = {} |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
246 long_coverage_vector = abundance[0] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
247 short_coverage_vector = abundance[1] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
248 percentage_long = long_coverage_vector/(long_coverage_vector+short_coverage_vector) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
249 for i in range(num_control): |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
250 res["control_{i}_coverage_long".format(i=i)] = float(long_coverage_vector[i]) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
251 res["control_{i}_coverage_short".format(i=i)] = float(short_coverage_vector[i]) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
252 res["control_{i}_percent_long".format(i=i)] = percentage_long[i] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
253 for k in range(num_treatment): |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
254 i = k + num_control |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
255 res["treatment_{i}_coverage_long".format(i=k)] = float(long_coverage_vector[i]) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
256 res["treatment_{i}_coverage_short".format(i=k)] = float(short_coverage_vector[i]) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
257 res["treatment_{i}_percent_long".format(i=k)] = percentage_long[i] |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
258 res["t_stat"], res["p_value"] = stat_test(percentage_long[:num_control], percentage_long[num_control:]) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
259 control_mean_percent = np.mean(np.array(percentage_long[:num_control])) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
260 treatment_mean_percent = np.mean(np.array(percentage_long[num_control:])) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
261 res["chr"] = utr_d["chr"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
262 res["start"] = utr_d["start"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
263 res["end"] = utr_d["end"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
264 res["strand"] = utr_d["strand"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
265 if is_reverse: |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
266 breakpoint = utr_d["new_end"] - breakpoint |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
267 else: |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
268 breakpoint = utr_d["new_start"] + breakpoint |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
269 res["breakpoint"] = breakpoint |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
270 res["breakpoint_type"] = breakpoint_type |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
271 res["control_mean_percent"] = control_mean_percent |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
272 res["treatment_mean_percent"] = treatment_mean_percent |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
273 res["gene"] = utr |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
274 result.append(res) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
275 return result |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
276 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
277 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
278 def optimize_breakpoint(plot_path, utr, utr_coverage, UTR_start, UTR_end, coverage_weigths, search_start, coverage_threshold, num_control): |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
279 """ |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
280 We are searching for a point within the UTR that minimizes the mean squared error, if the coverage vector was divided |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
281 at that point. utr_coverage is a list with items corresponding to numpy arrays of coverage for a sample. |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
282 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
283 num_samples = len(utr_coverage) |
13
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
284 normalized_utr_coverage = np.array(utr_coverage)/np.expand_dims(coverage_weigths, axis=1) |
538c4e2b423e
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42-dirty
mvdbeek
parents:
5
diff
changeset
|
285 start_coverage = [np.mean(coverage[0:99]) for coverage in utr_coverage] # filters threshold on mean coverage over first 100 nt |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
286 is_above_threshold = sum(np.array(start_coverage) >= coverage_threshold) >= num_samples # This filters on the raw threshold. Why? |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
287 is_above_length = UTR_end - UTR_start >= 150 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
288 if (is_above_threshold) and (is_above_length): |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
289 search_end = UTR_end - UTR_start |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
290 breakpoints = range(search_start, search_end + 1) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
291 mse_list = [ estimate_mse(normalized_utr_coverage, bp, num_samples, num_control) for bp in breakpoints ] |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
292 mse_list = [mse_list[0] for i in xrange(search_start)] + mse_list |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
293 if plot_path: |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
294 plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control) |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
295 if len(mse_list) > 0: |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
296 return mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
297 return False, False, False, False |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
298 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
299 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
300 def plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control): |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
301 """ |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
302 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
303 """ |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
304 fig = plt.figure(figsize=(8, 8)) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
305 gs = gridspec.GridSpec(2, 1) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
306 ax1 = plt.subplot(gs[0, :]) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
307 ax2 = plt.subplot(gs[1, :]) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
308 ax1.set_title("mean-squared error plot") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
309 ax1.set_ylabel("mean-squared error") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
310 ax1.set_xlabel("nt after UTR start") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
311 ax2.set_title("coverage plot") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
312 ax2.set_xlabel("nt after UTR start") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
313 ax2.set_ylabel("normalized nucleotide coverage") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
314 mse_control = [ condition[0] for condition in mse_list] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
315 mse_treatment = [ condition[1] for condition in mse_list] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
316 minima_control = get_minima(np.array(mse_control)) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
317 minima_treatment = get_minima(np.array(mse_treatment)) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
318 control = normalized_utr_coverage[:num_control] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
319 treatment = normalized_utr_coverage[num_control:] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
320 ax1.plot(mse_control, "b-") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
321 ax1.plot(mse_treatment, "r-") |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
322 [ax2.plot(cov, "b-") for cov in control] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
323 [ax2.plot(cov, "r-") for cov in treatment] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
324 [ax2.axvline(val, color="b", alpha=0.25) for val in minima_control] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
325 ax2.axvline(mse_control.index(min(mse_control)), color="b", alpha=1) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
326 [ax2.axvline(val, color="r", alpha=0.25) for val in minima_treatment] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
327 ax2.axvline(mse_treatment.index(min(mse_treatment)), color="r", alpha=1) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
328 fig.add_subplot(ax1) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
329 fig.add_subplot(ax2) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
330 gs.tight_layout(fig) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
331 fig.savefig(os.path.join(plot_path, "{utr}.svg".format(utr=utr))) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
332 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
333 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
334 def mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
335 """ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
336 Take in mse_list with control and treatment mse and return breakpoint and utr abundance for all local minima |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
337 in mse_list |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
338 """ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
339 mse_control = np.array([mse[0] for mse in mse_list]) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
340 mse_treatment = np.array([mse[1] for mse in mse_list]) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
341 control_breakpoints = list(get_minima(mse_control)) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
342 treatment_breakpoints = list(get_minima(mse_treatment)) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
343 control_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in control_breakpoints] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
344 treatment_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in treatment_breakpoints] |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
345 return control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
346 |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
347 def get_minima(a): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
348 """ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
349 get minima for numpy array a |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
350 """ |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
351 return np.where(np.r_[True, a[1:] < a[:-1]] & np.r_[a[:-1] < a[1:], True])[0]+1 |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
352 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
353 def estimate_mse(cov, bp, num_samples, num_control): |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
354 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
355 get abundance of long utr vs short utr with breakpoint specifying the position of long and short utr. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
356 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
357 with warnings.catch_warnings(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
358 warnings.simplefilter("ignore", category=RuntimeWarning) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
359 long_utr_vector = cov[:num_samples, bp:] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
360 short_utr_vector = cov[:num_samples, 0:bp] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
361 mean_long_utr = np.mean(long_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
362 mean_short_utr = np.mean(short_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
363 square_mean_centered_short_utr_vector = (short_utr_vector[:num_samples] - mean_short_utr[:, np.newaxis] )**2 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
364 square_mean_centered_long_utr_vector = (long_utr_vector[:num_samples] - mean_long_utr[:, np.newaxis])**2 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
365 mse_control = np.mean(np.append(square_mean_centered_long_utr_vector[:num_control], square_mean_centered_short_utr_vector[:num_control])) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
366 mse_treatment = np.mean(np.append(square_mean_centered_long_utr_vector[num_control:], square_mean_centered_short_utr_vector[num_control:])) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
367 return mse_control, mse_treatment |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
368 |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
369 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
370 def estimate_abundance(cov, bp, num_samples): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
371 with warnings.catch_warnings(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
372 warnings.simplefilter("ignore", category=RuntimeWarning) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
373 long_utr_vector = cov[:num_samples, bp:] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
374 short_utr_vector = cov[:num_samples, 0:bp] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
375 mean_long_utr = np.mean(long_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
376 mean_short_utr = np.mean(short_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
377 return mean_long_utr, mean_short_utr |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
378 |
5
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
379 def stat_test(a,b): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
380 return stats.ttest_ind(a,b) |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
381 |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
382 def gene_str_to_link(str): |
a5d8b08af089
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
mvdbeek
parents:
4
diff
changeset
|
383 return "<a href=\"{str}.svg\" type=\"image/svg+xml\" target=\"_blank\">{str}</a>".format(str=str) |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
384 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
385 if __name__ == '__main__': |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
386 args = parse_args() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
387 find_utr = UtrFinder(args) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
388 |