annotate lib/utils.py @ 0:f6ebec6e235e draft

Uploaded
author petrn
date Thu, 19 Dec 2019 13:46:43 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1 #!/usr/bin/env python3
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
2 import os
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
3 import hashlib
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
4
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
5 from itertools import chain
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
6
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
7
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
8
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
9 def md5checksum(filename, fail_if_missing=True):
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
10 try:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
11 md5 = hashlib.md5()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
12 with open(filename, "rb") as f:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
13 for i in iter(lambda: f.read(4096), b""):
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
14 md5.update(i)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
15 except FileNotFoundError as e:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
16 if not fail_if_missing:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
17 return "Not calculated!!!! File {} is missing".format(filename)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
18 else:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
19 raise e
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
20
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
21 return md5.hexdigest()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
22
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
23
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
24 class FilePath(str):
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
25 '''
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
26 Extension of str - it just contain additional atribute showing that the string is alsp path to file
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
27 '''
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
28
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
29 def __new__(cls, string):
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
30 obj = super(FilePath, cls).__new__(cls, string)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
31 obj.filepath = True
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
32 return obj
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
33
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
34 def relative(self, start):
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
35 ''' return path relative to start'''
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
36 return os.path.relpath(self, start)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
37
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
38
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
39 def save_as_table(d, path, header=None, relative=True):
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
40 ''' takes list of dictionaries and save csv file
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
41 define header if you want to use specific order!
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
42 '''
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
43 pathdir = os.path.dirname(path)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
44 if not header:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
45
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
46 all_keys = [i.keys() for i in d]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
47 header = set(chain(*all_keys))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
48 print("header: ---------", header)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
49 with open(path, 'w') as f:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
50 f.write("\t".join(header))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
51 f.write("\n")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
52 for i in d:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
53 istr = []
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
54 for key in header:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
55 if isinstance(i[key], FilePath):
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
56 if relative:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
57 istr.append('"' + str(i[key].relative(pathdir)) + '"')
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
58 else:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
59 istr.append('"' + str(i[key]) + '"')
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
60 else:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
61 if isinstance(i[key], str):
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
62 istr.append('"' + str(i[key] + '"'))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
63 else:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
64 istr.append(str(i[key]))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
65
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
66 f.write("\t".join(istr))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
67 f.write("\n")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
68
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
69
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
70 def export_tandem_consensus(clusters_info, path, rank=1, n=1):
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
71 ''' export tr consensu to file'''
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
72 print("exporting fasta files")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
73 print(clusters_info)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
74 s = None
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
75 with open(path, 'w') as f:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
76 for cl in clusters_info:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
77 print(cl)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
78 print(dir(cl))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
79 if cl.TR_consensus and rank == cl.tandem_rank:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
80 s = ">CL{index}_TR_{n}_x_{L}nt\n{sequence}\n".format(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
81 index=cl.index,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
82 n=n,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
83 L=cl.TR_monomer_length,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
84 sequence=n * cl.TR_consensus.replace('<pre>', ''))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
85 f.write(s)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
86 if s:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
87 return path
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
88 else:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
89 return None
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
90
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
91
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
92 def file_len(filename):
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
93 '''count number of lines in file'''
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
94 with open(filename) as f:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
95 i = 0
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
96 for i in f:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
97 i += i
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
98 return i
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
99
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
100 def go2line(f, L):
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
101 ''' find line L in file object f '''
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
102 f.seek(0)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
103 if L == 0:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
104 return
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
105 i = 0
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
106 pos = f.tell()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
107 for line in f:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
108 i += 1
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
109 if i == L:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
110 f.seek(pos)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
111 return
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
112 else:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
113 pos = pos + len(line)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
114
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
115 def format_query(x):
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
116 '''
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
117 make list for query in format ("x","y","x",...)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
118 '''
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
119 out = '("'+ '","'.join(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
120 map(str, x)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
121 ) + '")'
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
122 return out