Mercurial > repos > mvdbeek > dapars
annotate dapars.py @ 3:aca85eb3eb5b draft
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
author | mvdbeek |
---|---|
date | Wed, 28 Oct 2015 06:05:21 -0400 |
parents | 47b0044bc7f8 |
children | 73b932244237 |
rev | line source |
---|---|
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
1 import argparse |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
2 import os |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
3 import csv |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
4 import numpy as np |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
5 from collections import OrderedDict, namedtuple |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
6 import filter_utr |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
7 import subprocess |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
8 from multiprocessing import Pool |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
9 import warnings |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
10 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
11 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
12 def parse_args(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
13 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
14 Returns floating point values except for input files. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
15 My initial approach will not filter anything. (FDR. fold_change, PDUI, Num_least ...) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
16 :param argv: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
17 :return: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
18 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
19 parser = argparse.ArgumentParser(prog='DaPars', description='Determines the usage of proximal polyA usage') |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
20 parser.add_argument("-c", "--control_alignments", nargs="+", required=True, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
21 help="Alignment files in BAM format from control condition") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
22 parser.add_argument("-t", "--treatment_alignments", nargs="+", required=True, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
23 help="Alignment files in BAM format from treatment condition") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
24 parser.add_argument("-u", "--utr_bed_file", required=True, type=file, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
25 help="Bed file describing longest 3UTR positions") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
26 parser.add_argument("-o", "--output_file", required=True, type=argparse.FileType('w'), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
27 help="file containing output") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
28 parser.add_argument("-cpu", required=False, type=int, default=1, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
29 help="Number of CPU cores to use.") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
30 parser.add_argument("-s", "--search_start", required=False, type=int, default=50, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
31 help="Start search for breakpoint n nucleotides downstream of UTR start") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
32 parser.add_argument("-ct", "--coverage_threshold", required=False, type=float, default=20, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
33 help="minimum coverage in each aligment to be considered for determining breakpoints") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
34 parser.add_argument("-b", "--breakpoint_bed", required=False, type=argparse.FileType('w'), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
35 help="Write bedfile with coordinates of breakpoint positions to supplied path.") |
2
47b0044bc7f8
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
1
diff
changeset
|
36 parser.add_argument("-v", "--version", action='version', version='%(prog)s 0.1.5') |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
37 return parser.parse_args() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
38 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
39 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
40 class UtrFinder(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
41 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
42 This seems to be the main caller. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
43 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
44 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
45 def __init__(self, args): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
46 self.control_alignments = [file for file in args.control_alignments] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
47 self.treatment_alignments = [file for file in args.treatment_alignments] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
48 self.n_cpus = args.cpu |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
49 self.search_start = args.search_start |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
50 self.coverage_threshold = args.coverage_threshold |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
51 self.utr = args.utr_bed_file |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
52 self.gtf_fields = filter_utr.get_gtf_fields() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
53 self.result_file = args.output_file |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
54 self.all_alignments = self.control_alignments + self.treatment_alignments |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
55 self.alignment_names = { file: os.path.basename(file) for file in self.all_alignments } |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
56 self.num_samples = len(self.all_alignments) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
57 self.utr_dict = self.get_utr_dict(0.2) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
58 self.dump_utr_dict_to_bedfile() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
59 print "Established dictionary of 3\'UTRs" |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
60 self.coverage_files = self.run_bedtools_coverage() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
61 self.utr_coverages = self.read_coverage_result() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
62 print "Established dictionary of 3\'UTR coverages" |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
63 self.coverage_weights = self.get_coverage_weights() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
64 self.result_tuple = self.get_result_tuple() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
65 self.result_d = self.calculate_apa_ratios() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
66 self.write_results() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
67 if args.breakpoint_bed: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
68 self.bed_output = args.breakpoint_bed |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
69 self.write_bed() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
70 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
71 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
72 def dump_utr_dict_to_bedfile(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
73 w = csv.writer(open("tmp_bedfile.bed", "w"), delimiter="\t") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
74 for gene, utr in self.utr_dict.iteritems(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
75 w.writerow([utr["chr"], utr["new_start"]-1, utr["new_end"], gene, ".", utr["strand"]]) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
76 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
77 def run_bedtools_coverage(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
78 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
79 Use bedtools coverage to generate pileup data for all alignment files for the regions specified in utr_dict. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
80 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
81 coverage_files = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
82 cmds = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
83 for alignment_file in self.all_alignments: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
84 cmd = "sort -k1,1 -k2,2n tmp_bedfile.bed | " |
3
aca85eb3eb5b
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
2
diff
changeset
|
85 cmd = cmd + "bedtools coverage -d -s -abam {alignment_file} -b stdin |" \ |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
86 " cut -f 4,7,8 > coverage_file_{alignment_name}".format( |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
87 alignment_file = alignment_file, alignment_name= self.alignment_names[alignment_file] ) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
88 cmds.append(cmd) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
89 pool = Pool(self.n_cpus) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
90 subprocesses = [subprocess.Popen([cmd], shell=True) for cmd in cmds] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
91 [p.wait() for p in subprocesses] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
92 coverage_files = ["gene_position_coverage_{alignment_name}".format( |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
93 alignment_name = self.alignment_names[alignment_file]) for alignment_file in self.all_alignments ] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
94 return coverage_files |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
95 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
96 def read_coverage_result(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
97 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
98 Read coverages back in and store as dictionary of numpy arrays |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
99 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
100 coverage_dict = { gene: { name: np.zeros(utr_d["new_end"]+1-utr_d["new_start"]) for name in self.alignment_names.itervalues() } for gene, utr_d in self.utr_dict.iteritems() } |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
101 for alignment_name in self.alignment_names.itervalues(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
102 with open("coverage_file_{alignment_name}".format(alignment_name = alignment_name)) as coverage_file: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
103 for line in coverage_file: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
104 gene, position, coverage= line.strip().split("\t") |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
105 coverage_dict[gene][alignment_name][int(position)-1] = coverage |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
106 for utr_d in self.utr_dict.itervalues(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
107 if utr_d["strand"] == "-": |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
108 for alignment_name in self.alignment_names.values(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
109 coverage_dict[gene][alignment_name] = coverage_dict[gene][alignment_name][::-1] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
110 return coverage_dict |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
111 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
112 def get_utr_dict(self, shift): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
113 utr_dict = OrderedDict() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
114 for line in self.utr: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
115 if not line.startswith("#"): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
116 filter_utr.get_feature_dict( line=line, gtf_fields=self.gtf_fields, utr_dict=utr_dict, feature="UTR" ) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
117 gene, utr_d = utr_dict.popitem() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
118 utr_d = utr_d[0] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
119 end_shift = int(round(abs(utr_d["start"] - utr_d["end"]) * shift)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
120 if utr_d["strand"] == "+": |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
121 utr_d["new_end"] = utr_d["end"] - end_shift |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
122 utr_d["new_start"] = utr_d["start"] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
123 else: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
124 utr_d["new_end"] = utr_d["end"] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
125 utr_d["new_start"] = utr_d["start"] + end_shift |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
126 if utr_d["new_start"] + 50 < utr_d["new_end"]: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
127 utr_dict[gene] = utr_d |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
128 return utr_dict |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
129 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
130 def get_coverage_weights(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
131 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
132 Return weights for normalizing coverage. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
133 utr_coverage is still confusing. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
134 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
135 coverage_per_alignment = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
136 for utr in self.utr_coverages.itervalues(): # TODO: be smarter about this. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
137 utr_coverage = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
138 for vector in utr.itervalues(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
139 utr_coverage.append(np.sum(vector)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
140 coverage_per_alignment.append(utr_coverage) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
141 coverages = np.array([ sum(x) for x in zip(*coverage_per_alignment) ]) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
142 coverage_weights = coverages / np.mean(coverages) # TODO: proabably median is better suited? |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
143 return coverage_weights |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
144 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
145 def get_result_tuple(self): |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
146 static_desc = ["chr", "start", "end", "strand", "gene", "breakpoint", |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
147 "breakpoint_type", "control_mean_percent", "treatment_mean_percent" ] |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
148 samples_desc = [] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
149 for statistic in ["coverage_long", "coverage_short", "percent_long"]: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
150 for i, sample in enumerate(self.control_alignments): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
151 samples_desc.append("control_{i}_{statistic}".format(i=i, statistic = statistic)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
152 for i, sample in enumerate(self.treatment_alignments): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
153 samples_desc.append("treatment_{i}_{statistic}".format(i=i, statistic = statistic)) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
154 return namedtuple("result", static_desc + samples_desc) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
155 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
156 def calculate_apa_ratios(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
157 result_d = OrderedDict() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
158 arg_d = {"result_tuple": self.result_tuple, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
159 "coverage_weights":self.coverage_weights, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
160 "num_samples":self.num_samples, |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
161 "num_control":len(self.control_alignments), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
162 "num_treatment":len(self.treatment_alignments), |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
163 "result_d":result_d} |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
164 pool = Pool(self.n_cpus) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
165 tasks = [ (self.utr_coverages[utr], utr, utr_d, self.result_tuple._fields, self.coverage_weights, self.num_samples, |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
166 len(self.control_alignments), len(self.treatment_alignments), self.search_start, |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
167 self.coverage_threshold) for utr, utr_d in self.utr_dict.iteritems() ] |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
168 processed_tasks = [ pool.apply_async(calculate_all_utr, t) for t in tasks] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
169 result = [res.get() for res in processed_tasks] |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
170 for res_control, res_treatment in result: |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
171 if isinstance(res_control, dict): |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
172 t = self.result_tuple(**res_control) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
173 result_d[res_control["gene"]+"_bp_control"] = t |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
174 if isinstance(res_treatment, dict): |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
175 t = self.result_tuple(**res_treatment) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
176 result_d[res_treatment["gene"]+"_bp_treatment"] = t |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
177 return result_d |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
178 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
179 def write_results(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
180 w = csv.writer(self.result_file, delimiter='\t') |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
181 header = list(self.result_tuple._fields) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
182 header[0] = "#chr" |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
183 w.writerow(header) # field header |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
184 w.writerows( self.result_d.values()) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
185 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
186 def write_bed(self): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
187 w = csv.writer(self.bed_output, delimiter='\t') |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
188 bed = [(result.chr, result.breakpoint, result.breakpoint+1, result.gene+"_"+result.breakpoint_type, 0, result.strand) for result in self.result_d.itervalues()] |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
189 w.writerows(bed) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
190 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
191 |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
192 def calculate_all_utr(utr_coverage, utr, utr_d, result_tuple_fields, coverage_weights, num_samples, num_control, |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
193 num_treatment, search_start, coverage_threshold): |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
194 res_control = dict(zip(result_tuple_fields, result_tuple_fields)) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
195 res_treatment = res_control.copy() |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
196 if utr_d["strand"] == "+": |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
197 is_reverse = False |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
198 else: |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
199 is_reverse = True |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
200 control_breakpoint, \ |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
201 control_abundance, \ |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
202 treatment_breakpoint, \ |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
203 treatment_abundance = optimize_breakpoint(utr_coverage, utr_d["new_start"], utr_d["new_end"], coverage_weights, |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
204 search_start, coverage_threshold, num_control) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
205 if control_breakpoint: |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
206 breakpoint_to_result(res_control, utr, utr_d, control_breakpoint, "control_breakpoint", control_abundance, is_reverse, num_samples, |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
207 num_control, num_treatment) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
208 if treatment_breakpoint: |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
209 breakpoint_to_result(res_treatment, utr, utr_d, treatment_breakpoint, "treatment_breakpoint", treatment_abundance, is_reverse, |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
210 num_samples, num_control, num_treatment) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
211 return res_control, res_treatment |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
212 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
213 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
214 def breakpoint_to_result(res, utr, utr_d, breakpoint, breakpoint_type, |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
215 abundances, is_reverse, num_samples, num_control, num_treatment): |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
216 """ |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
217 Takes in a result dictionary res and fills the necessary fields |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
218 """ |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
219 long_coverage_vector = abundances[0] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
220 short_coverage_vector = abundances[1] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
221 num_non_zero = sum((np.array(long_coverage_vector) + np.array(short_coverage_vector)) > 0) # TODO: This introduces bias |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
222 if num_non_zero == num_samples: |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
223 percentage_long = [] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
224 for i in range(num_samples): |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
225 ratio = float(long_coverage_vector[i]) / (long_coverage_vector[i] + short_coverage_vector[i]) # long 3'UTR percentage |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
226 percentage_long.append(ratio) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
227 for i in range(num_control): |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
228 res["control_{i}_coverage_long".format(i=i)] = float(long_coverage_vector[i]) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
229 res["control_{i}_coverage_short".format(i=i)] = float(short_coverage_vector[i]) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
230 res["control_{i}_percent_long".format(i=i)] = percentage_long[i] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
231 for k in range(num_treatment): |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
232 i = k + num_control |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
233 res["treatment_{i}_coverage_long".format(i=k)] = float(long_coverage_vector[i]) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
234 res["treatment_{i}_coverage_short".format(i=k)] = float(short_coverage_vector[i]) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
235 res["treatment_{i}_percent_long".format(i=k)] = percentage_long[i] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
236 control_mean_percent = np.mean(np.array(percentage_long[:num_control])) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
237 treatment_mean_percent = np.mean(np.array(percentage_long[num_control:])) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
238 res["chr"] = utr_d["chr"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
239 res["start"] = utr_d["start"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
240 res["end"] = utr_d["end"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
241 res["strand"] = utr_d["strand"] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
242 if is_reverse: |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
243 breakpoint = utr_d["new_end"] - breakpoint |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
244 else: |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
245 breakpoint = utr_d["new_start"] + breakpoint |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
246 res["breakpoint"] = breakpoint |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
247 res["breakpoint_type"] = breakpoint_type |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
248 res["control_mean_percent"] = control_mean_percent |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
249 res["treatment_mean_percent"] = treatment_mean_percent |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
250 res["gene"] = utr |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
251 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
252 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
253 def optimize_breakpoint(utr_coverage, UTR_start, UTR_end, coverage_weigths, search_start, coverage_threshold, num_control): |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
254 """ |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
255 We are searching for a point within the UTR that minimizes the mean squared error, if the coverage vector was divided |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
256 at that point. utr_coverage is a list with items corresponding to numpy arrays of coverage for a sample. |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
257 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
258 search_point_end = int(abs((UTR_end - UTR_start)) * 0.1) # TODO: This is 10% of total UTR end. Why? |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
259 num_samples = len(utr_coverage) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
260 normalized_utr_coverage = np.array([coverage/ coverage_weigths[i] for i, coverage in enumerate( utr_coverage.values() )]) |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
261 start_coverage = [np.mean(coverage[0:99]) for coverage in utr_coverage.values()] # filters threshold on mean coverage over first 100 nt |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
262 is_above_threshold = sum(np.array(start_coverage) >= coverage_threshold) >= num_samples # This filters on the raw threshold. Why? |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
263 is_above_length = UTR_end - UTR_start >= 150 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
264 if (is_above_threshold) and (is_above_length): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
265 search_end = UTR_end - UTR_start - search_point_end |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
266 breakpoints = range(search_start, search_end + 1) |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
267 mse_list = [ estimate_mse(normalized_utr_coverage, bp, num_samples, num_control) for bp in breakpoints ] |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
268 if len(mse_list) > 0: |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
269 return mse_to_breakpoint(mse_list, normalized_utr_coverage, breakpoints, num_samples) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
270 return False, False, False, False |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
271 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
272 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
273 def mse_to_breakpoint(mse_list, normalized_utr_coverage, breakpoints, num_samples): |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
274 """ |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
275 Take in mse_list with control and treatment mse and return breakpoint and utr abundance |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
276 """ |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
277 mse_control = [mse[0] for mse in mse_list] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
278 mse_treatment = [mse[1] for mse in mse_list] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
279 control_index = mse_control.index(min(mse_control)) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
280 treatment_index = mse_treatment.index(min(mse_treatment)) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
281 control_breakpoint = breakpoints[control_index] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
282 treatment_breakpoint = breakpoints[treatment_index] |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
283 control_abundance = estimate_abundance(normalized_utr_coverage, control_breakpoint, num_samples) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
284 treatment_abundance = estimate_abundance(normalized_utr_coverage, treatment_breakpoint, num_samples) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
285 return control_breakpoint, control_abundance, treatment_breakpoint, treatment_abundance |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
286 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
287 |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
288 def estimate_mse(cov, bp, num_samples, num_control): |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
289 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
290 get abundance of long utr vs short utr with breakpoint specifying the position of long and short utr. |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
291 """ |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
292 with warnings.catch_warnings(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
293 warnings.simplefilter("ignore", category=RuntimeWarning) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
294 long_utr_vector = cov[:num_samples, bp:] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
295 short_utr_vector = cov[:num_samples, 0:bp] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
296 mean_long_utr = np.mean(long_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
297 mean_short_utr = np.mean(short_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
298 square_mean_centered_short_utr_vector = (short_utr_vector[:num_samples] - mean_short_utr[:, np.newaxis] )**2 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
299 square_mean_centered_long_utr_vector = (long_utr_vector[:num_samples] - mean_long_utr[:, np.newaxis])**2 |
1
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
300 mse_control = np.mean(np.append(square_mean_centered_long_utr_vector[:num_control], square_mean_centered_short_utr_vector[:num_control])) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
301 mse_treatment = np.mean(np.append(square_mean_centered_long_utr_vector[num_control:], square_mean_centered_short_utr_vector[num_control:])) |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
302 return mse_control, mse_treatment |
1b20ba32b4c5
planemo upload for repository https://github.com/mvdbeek/dapars commit a02ced0b40d8946aa78a58321bd8f9771148391e-dirty
mvdbeek
parents:
0
diff
changeset
|
303 |
0
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
304 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
305 def estimate_abundance(cov, bp, num_samples): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
306 with warnings.catch_warnings(): |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
307 warnings.simplefilter("ignore", category=RuntimeWarning) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
308 long_utr_vector = cov[:num_samples, bp:] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
309 short_utr_vector = cov[:num_samples, 0:bp] |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
310 mean_long_utr = np.mean(long_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
311 mean_short_utr = np.mean(short_utr_vector, 1) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
312 return mean_long_utr, mean_short_utr |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
313 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
314 |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
315 if __name__ == '__main__': |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
316 args = parse_args() |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
317 find_utr = UtrFinder(args) |
bb84ee2f2137
planemo upload for repository https://github.com/mvdbeek/dapars commit 868f8f2f7ac5d70c39b7d725ff087833b0f24f52-dirty
mvdbeek
parents:
diff
changeset
|
318 |