annotate dapars.py @ 4:73b932244237 draft

planemo upload for repository https://github.com/mvdbeek/dapars commit c2344ee1b8c6371007b7484b02d17fb056eb2427-dirty
author mvdbeek
date Wed, 28 Oct 2015 06:22:18 -0400
parents aca85eb3eb5b
children a5d8b08af089
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
1 import argparse
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
2 import os
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
3 import csv
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
4 import numpy as np
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
5 from collections import OrderedDict, namedtuple
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
6 import filter_utr
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
7 import subprocess
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
8 from multiprocessing import Pool
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
9 import warnings
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
10
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
11
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
12 def parse_args():
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
13 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
14 Returns floating point values except for input files.
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
15 My initial approach will not filter anything. (FDR. fold_change, PDUI, Num_least ...)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
16 :param argv:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
17 :return:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
18 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
19 parser = argparse.ArgumentParser(prog='DaPars', description='Determines the usage of proximal polyA usage')
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
20 parser.add_argument("-c", "--control_alignments", nargs="+", required=True,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
21 help="Alignment files in BAM format from control condition")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
22 parser.add_argument("-t", "--treatment_alignments", nargs="+", required=True,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
23 help="Alignment files in BAM format from treatment condition")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
24 parser.add_argument("-u", "--utr_bed_file", required=True, type=file,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
25 help="Bed file describing longest 3UTR positions")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
26 parser.add_argument("-o", "--output_file", required=True, type=argparse.FileType('w'),
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
27 help="file containing output")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
28 parser.add_argument("-cpu", required=False, type=int, default=1,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
29 help="Number of CPU cores to use.")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
30 parser.add_argument("-s", "--search_start", required=False, type=int, default=50,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
31 help="Start search for breakpoint n nucleotides downstream of UTR start")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
32 parser.add_argument("-ct", "--coverage_threshold", required=False, type=float, default=20,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
33 help="minimum coverage in each aligment to be considered for determining breakpoints")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
34 parser.add_argument("-b", "--breakpoint_bed", required=False, type=argparse.FileType('w'),
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
35 help="Write bedfile with coordinates of breakpoint positions to supplied path.")
2
47b0044bc7f8 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 1
diff changeset
36 parser.add_argument("-v", "--version", action='version', version='%(prog)s 0.1.5')
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
37 return parser.parse_args()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
38
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
39
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
40 class UtrFinder():
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
41 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
42 This seems to be the main caller.
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
43 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
44
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
45 def __init__(self, args):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
46 self.control_alignments = [file for file in args.control_alignments]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
47 self.treatment_alignments = [file for file in args.treatment_alignments]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
48 self.n_cpus = args.cpu
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
49 self.search_start = args.search_start
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
50 self.coverage_threshold = args.coverage_threshold
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
51 self.utr = args.utr_bed_file
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
52 self.gtf_fields = filter_utr.get_gtf_fields()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
53 self.result_file = args.output_file
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
54 self.all_alignments = self.control_alignments + self.treatment_alignments
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
55 self.alignment_names = { file: os.path.basename(file) for file in self.all_alignments }
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
56 self.num_samples = len(self.all_alignments)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
57 self.utr_dict = self.get_utr_dict(0.2)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
58 self.dump_utr_dict_to_bedfile()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
59 print "Established dictionary of 3\'UTRs"
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
60 self.coverage_files = self.run_bedtools_coverage()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
61 self.utr_coverages = self.read_coverage_result()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
62 print "Established dictionary of 3\'UTR coverages"
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
63 self.coverage_weights = self.get_coverage_weights()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
64 self.result_tuple = self.get_result_tuple()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
65 self.result_d = self.calculate_apa_ratios()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
66 self.write_results()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
67 if args.breakpoint_bed:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
68 self.bed_output = args.breakpoint_bed
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
69 self.write_bed()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
70
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
71
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
72 def dump_utr_dict_to_bedfile(self):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
73 w = csv.writer(open("tmp_bedfile.bed", "w"), delimiter="\t")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
74 for gene, utr in self.utr_dict.iteritems():
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
75 w.writerow([utr["chr"], utr["new_start"]-1, utr["new_end"], gene, ".", utr["strand"]])
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
76
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
77 def run_bedtools_coverage(self):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
78 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
79 Use bedtools coverage to generate pileup data for all alignment files for the regions specified in utr_dict.
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
80 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
81 coverage_files = []
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
82 cmds = []
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
83 for alignment_file in self.all_alignments:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
84 cmd = "sort -k1,1 -k2,2n tmp_bedfile.bed | "
3
aca85eb3eb5b planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 2
diff changeset
85 cmd = cmd + "bedtools coverage -d -s -abam {alignment_file} -b stdin |" \
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
86 " cut -f 4,7,8 > coverage_file_{alignment_name}".format(
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
87 alignment_file = alignment_file, alignment_name= self.alignment_names[alignment_file] )
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
88 cmds.append(cmd)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
89 pool = Pool(self.n_cpus)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
90 subprocesses = [subprocess.Popen([cmd], shell=True) for cmd in cmds]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
91 [p.wait() for p in subprocesses]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
92 coverage_files = ["gene_position_coverage_{alignment_name}".format(
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
93 alignment_name = self.alignment_names[alignment_file]) for alignment_file in self.all_alignments ]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
94 return coverage_files
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
95
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
96 def read_coverage_result(self):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
97 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
98 Read coverages back in and store as dictionary of numpy arrays
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
99 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
100 coverage_dict = { gene: { name: np.zeros(utr_d["new_end"]+1-utr_d["new_start"]) for name in self.alignment_names.itervalues() } for gene, utr_d in self.utr_dict.iteritems() }
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
101 for alignment_name in self.alignment_names.itervalues():
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
102 with open("coverage_file_{alignment_name}".format(alignment_name = alignment_name)) as coverage_file:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
103 for line in coverage_file:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
104 gene, position, coverage= line.strip().split("\t")
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
105 coverage_dict[gene][alignment_name][int(position)-1] = coverage
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
106 for utr_d in self.utr_dict.itervalues():
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
107 if utr_d["strand"] == "-":
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
108 for alignment_name in self.alignment_names.values():
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
109 coverage_dict[gene][alignment_name] = coverage_dict[gene][alignment_name][::-1]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
110 return coverage_dict
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
111
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
112 def get_utr_dict(self, shift):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
113 utr_dict = OrderedDict()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
114 for line in self.utr:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
115 if not line.startswith("#"):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
116 filter_utr.get_feature_dict( line=line, gtf_fields=self.gtf_fields, utr_dict=utr_dict, feature="UTR" )
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
117 gene, utr_d = utr_dict.popitem()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
118 utr_d = utr_d[0]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
119 end_shift = int(round(abs(utr_d["start"] - utr_d["end"]) * shift))
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
120 if utr_d["strand"] == "+":
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
121 utr_d["new_end"] = utr_d["end"] - end_shift
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
122 utr_d["new_start"] = utr_d["start"]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
123 else:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
124 utr_d["new_end"] = utr_d["end"]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
125 utr_d["new_start"] = utr_d["start"] + end_shift
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
126 if utr_d["new_start"] + 50 < utr_d["new_end"]:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
127 utr_dict[gene] = utr_d
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
128 return utr_dict
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
129
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
130 def get_coverage_weights(self):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
131 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
132 Return weights for normalizing coverage.
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
133 utr_coverage is still confusing.
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
134 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
135 coverage_per_alignment = []
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
136 for utr in self.utr_coverages.itervalues(): # TODO: be smarter about this.
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
137 utr_coverage = []
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
138 for vector in utr.itervalues():
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
139 utr_coverage.append(np.sum(vector))
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
140 coverage_per_alignment.append(utr_coverage)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
141 coverages = np.array([ sum(x) for x in zip(*coverage_per_alignment) ])
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
142 coverage_weights = coverages / np.mean(coverages) # TODO: proabably median is better suited?
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
143 return coverage_weights
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
144
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
145 def get_result_tuple(self):
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
146 static_desc = ["chr", "start", "end", "strand", "gene", "breakpoint",
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
147 "breakpoint_type", "control_mean_percent", "treatment_mean_percent" ]
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
148 samples_desc = []
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
149 for statistic in ["coverage_long", "coverage_short", "percent_long"]:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
150 for i, sample in enumerate(self.control_alignments):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
151 samples_desc.append("control_{i}_{statistic}".format(i=i, statistic = statistic))
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
152 for i, sample in enumerate(self.treatment_alignments):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
153 samples_desc.append("treatment_{i}_{statistic}".format(i=i, statistic = statistic))
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
154 return namedtuple("result", static_desc + samples_desc)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
155
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
156 def calculate_apa_ratios(self):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
157 result_d = OrderedDict()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
158 arg_d = {"result_tuple": self.result_tuple,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
159 "coverage_weights":self.coverage_weights,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
160 "num_samples":self.num_samples,
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
161 "num_control":len(self.control_alignments),
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
162 "num_treatment":len(self.treatment_alignments),
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
163 "result_d":result_d}
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
164 pool = Pool(self.n_cpus)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
165 tasks = [ (self.utr_coverages[utr], utr, utr_d, self.result_tuple._fields, self.coverage_weights, self.num_samples,
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
166 len(self.control_alignments), len(self.treatment_alignments), self.search_start,
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
167 self.coverage_threshold) for utr, utr_d in self.utr_dict.iteritems() ]
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
168 processed_tasks = [ pool.apply_async(calculate_all_utr, t) for t in tasks]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
169 result = [res.get() for res in processed_tasks]
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
170 for res_control, res_treatment in result:
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
171 if isinstance(res_control, dict):
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
172 t = self.result_tuple(**res_control)
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
173 result_d[res_control["gene"]+"_bp_control"] = t
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
174 if isinstance(res_treatment, dict):
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
175 t = self.result_tuple(**res_treatment)
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
176 result_d[res_treatment["gene"]+"_bp_treatment"] = t
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
177 return result_d
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
178
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
179 def write_results(self):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
180 w = csv.writer(self.result_file, delimiter='\t')
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
181 header = list(self.result_tuple._fields)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
182 header[0] = "#chr"
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
183 w.writerow(header) # field header
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
184 w.writerows( self.result_d.values())
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
185
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
186 def write_bed(self):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
187 w = csv.writer(self.bed_output, delimiter='\t')
4
73b932244237 planemo upload for repository https://github.com/mvdbeek/dapars commit c2344ee1b8c6371007b7484b02d17fb056eb2427-dirty
mvdbeek
parents: 3
diff changeset
188 bed = [(result.chr, result.breakpoint, int(result.breakpoint)+1, result.gene+"_"+result.breakpoint_type, 0, result.strand) for result in self.result_d.itervalues()]
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
189 w.writerows(bed)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
190
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
191
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
192 def calculate_all_utr(utr_coverage, utr, utr_d, result_tuple_fields, coverage_weights, num_samples, num_control,
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
193 num_treatment, search_start, coverage_threshold):
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
194 res_control = dict(zip(result_tuple_fields, result_tuple_fields))
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
195 res_treatment = res_control.copy()
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
196 if utr_d["strand"] == "+":
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
197 is_reverse = False
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
198 else:
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
199 is_reverse = True
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
200 control_breakpoint, \
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
201 control_abundance, \
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
202 treatment_breakpoint, \
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
203 treatment_abundance = optimize_breakpoint(utr_coverage, utr_d["new_start"], utr_d["new_end"], coverage_weights,
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
204 search_start, coverage_threshold, num_control)
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
205 if control_breakpoint:
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
206 breakpoint_to_result(res_control, utr, utr_d, control_breakpoint, "control_breakpoint", control_abundance, is_reverse, num_samples,
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
207 num_control, num_treatment)
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
208 if treatment_breakpoint:
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
209 breakpoint_to_result(res_treatment, utr, utr_d, treatment_breakpoint, "treatment_breakpoint", treatment_abundance, is_reverse,
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
210 num_samples, num_control, num_treatment)
4
73b932244237 planemo upload for repository https://github.com/mvdbeek/dapars commit c2344ee1b8c6371007b7484b02d17fb056eb2427-dirty
mvdbeek
parents: 3
diff changeset
211 if res_control == dict(zip(result_tuple_fields, result_tuple_fields)):
73b932244237 planemo upload for repository https://github.com/mvdbeek/dapars commit c2344ee1b8c6371007b7484b02d17fb056eb2427-dirty
mvdbeek
parents: 3
diff changeset
212 res_control = False
73b932244237 planemo upload for repository https://github.com/mvdbeek/dapars commit c2344ee1b8c6371007b7484b02d17fb056eb2427-dirty
mvdbeek
parents: 3
diff changeset
213 if res_treatment == dict(zip(result_tuple_fields, result_tuple_fields)):
73b932244237 planemo upload for repository https://github.com/mvdbeek/dapars commit c2344ee1b8c6371007b7484b02d17fb056eb2427-dirty
mvdbeek
parents: 3
diff changeset
214 res_treatment == False
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
215 return res_control, res_treatment
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
216
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
217
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
218 def breakpoint_to_result(res, utr, utr_d, breakpoint, breakpoint_type,
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
219 abundances, is_reverse, num_samples, num_control, num_treatment):
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
220 """
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
221 Takes in a result dictionary res and fills the necessary fields
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
222 """
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
223 long_coverage_vector = abundances[0]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
224 short_coverage_vector = abundances[1]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
225 num_non_zero = sum((np.array(long_coverage_vector) + np.array(short_coverage_vector)) > 0) # TODO: This introduces bias
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
226 if num_non_zero == num_samples:
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
227 percentage_long = []
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
228 for i in range(num_samples):
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
229 ratio = float(long_coverage_vector[i]) / (long_coverage_vector[i] + short_coverage_vector[i]) # long 3'UTR percentage
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
230 percentage_long.append(ratio)
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
231 for i in range(num_control):
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
232 res["control_{i}_coverage_long".format(i=i)] = float(long_coverage_vector[i])
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
233 res["control_{i}_coverage_short".format(i=i)] = float(short_coverage_vector[i])
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
234 res["control_{i}_percent_long".format(i=i)] = percentage_long[i]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
235 for k in range(num_treatment):
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
236 i = k + num_control
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
237 res["treatment_{i}_coverage_long".format(i=k)] = float(long_coverage_vector[i])
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
238 res["treatment_{i}_coverage_short".format(i=k)] = float(short_coverage_vector[i])
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
239 res["treatment_{i}_percent_long".format(i=k)] = percentage_long[i]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
240 control_mean_percent = np.mean(np.array(percentage_long[:num_control]))
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
241 treatment_mean_percent = np.mean(np.array(percentage_long[num_control:]))
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
242 res["chr"] = utr_d["chr"]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
243 res["start"] = utr_d["start"]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
244 res["end"] = utr_d["end"]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
245 res["strand"] = utr_d["strand"]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
246 if is_reverse:
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
247 breakpoint = utr_d["new_end"] - breakpoint
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
248 else:
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
249 breakpoint = utr_d["new_start"] + breakpoint
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
250 res["breakpoint"] = breakpoint
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
251 res["breakpoint_type"] = breakpoint_type
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
252 res["control_mean_percent"] = control_mean_percent
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
253 res["treatment_mean_percent"] = treatment_mean_percent
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
254 res["gene"] = utr
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
255
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
256
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
257 def optimize_breakpoint(utr_coverage, UTR_start, UTR_end, coverage_weigths, search_start, coverage_threshold, num_control):
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
258 """
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
259 We are searching for a point within the UTR that minimizes the mean squared error, if the coverage vector was divided
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
260 at that point. utr_coverage is a list with items corresponding to numpy arrays of coverage for a sample.
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
261 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
262 search_point_end = int(abs((UTR_end - UTR_start)) * 0.1) # TODO: This is 10% of total UTR end. Why?
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
263 num_samples = len(utr_coverage)
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
264 normalized_utr_coverage = np.array([coverage/ coverage_weigths[i] for i, coverage in enumerate( utr_coverage.values() )])
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
265 start_coverage = [np.mean(coverage[0:99]) for coverage in utr_coverage.values()] # filters threshold on mean coverage over first 100 nt
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
266 is_above_threshold = sum(np.array(start_coverage) >= coverage_threshold) >= num_samples # This filters on the raw threshold. Why?
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
267 is_above_length = UTR_end - UTR_start >= 150
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
268 if (is_above_threshold) and (is_above_length):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
269 search_end = UTR_end - UTR_start - search_point_end
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
270 breakpoints = range(search_start, search_end + 1)
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
271 mse_list = [ estimate_mse(normalized_utr_coverage, bp, num_samples, num_control) for bp in breakpoints ]
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
272 if len(mse_list) > 0:
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
273 return mse_to_breakpoint(mse_list, normalized_utr_coverage, breakpoints, num_samples)
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
274 return False, False, False, False
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
275
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
276
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
277 def mse_to_breakpoint(mse_list, normalized_utr_coverage, breakpoints, num_samples):
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
278 """
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
279 Take in mse_list with control and treatment mse and return breakpoint and utr abundance
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
280 """
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
281 mse_control = [mse[0] for mse in mse_list]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
282 mse_treatment = [mse[1] for mse in mse_list]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
283 control_index = mse_control.index(min(mse_control))
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
284 treatment_index = mse_treatment.index(min(mse_treatment))
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
285 control_breakpoint = breakpoints[control_index]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
286 treatment_breakpoint = breakpoints[treatment_index]
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
287 control_abundance = estimate_abundance(normalized_utr_coverage, control_breakpoint, num_samples)
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
288 treatment_abundance = estimate_abundance(normalized_utr_coverage, treatment_breakpoint, num_samples)
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
289 return control_breakpoint, control_abundance, treatment_breakpoint, treatment_abundance
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
290
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
291
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
292 def estimate_mse(cov, bp, num_samples, num_control):
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
293 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
294 get abundance of long utr vs short utr with breakpoint specifying the position of long and short utr.
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
295 """
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
296 with warnings.catch_warnings():
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
297 warnings.simplefilter("ignore", category=RuntimeWarning)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
298 long_utr_vector = cov[:num_samples, bp:]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
299 short_utr_vector = cov[:num_samples, 0:bp]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
300 mean_long_utr = np.mean(long_utr_vector, 1)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
301 mean_short_utr = np.mean(short_utr_vector, 1)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
302 square_mean_centered_short_utr_vector = (short_utr_vector[:num_samples] - mean_short_utr[:, np.newaxis] )**2
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
303 square_mean_centered_long_utr_vector = (long_utr_vector[:num_samples] - mean_long_utr[:, np.newaxis])**2
1
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
304 mse_control = np.mean(np.append(square_mean_centered_long_utr_vector[:num_control], square_mean_centered_short_utr_vector[:num_control]))
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
305 mse_treatment = np.mean(np.append(square_mean_centered_long_utr_vector[num_control:], square_mean_centered_short_utr_vector[num_control:]))
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
306 return mse_control, mse_treatment
1b20ba32b4c5 planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents: 0
diff changeset
307
0
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
308
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
309 def estimate_abundance(cov, bp, num_samples):
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
310 with warnings.catch_warnings():
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
311 warnings.simplefilter("ignore", category=RuntimeWarning)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
312 long_utr_vector = cov[:num_samples, bp:]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
313 short_utr_vector = cov[:num_samples, 0:bp]
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
314 mean_long_utr = np.mean(long_utr_vector, 1)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
315 mean_short_utr = np.mean(short_utr_vector, 1)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
316 return mean_long_utr, mean_short_utr
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
317
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
318
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
319 if __name__ == '__main__':
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
320 args = parse_args()
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
321 find_utr = UtrFinder(args)
bb84ee2f2137 planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff changeset
322