0
|
1 #!/usr/bin/env python3
|
|
2 import os
|
|
3 import hashlib
|
|
4
|
|
5 from itertools import chain
|
|
6
|
|
7
|
|
8
|
|
9 def md5checksum(filename, fail_if_missing=True):
|
|
10 try:
|
|
11 md5 = hashlib.md5()
|
|
12 with open(filename, "rb") as f:
|
|
13 for i in iter(lambda: f.read(4096), b""):
|
|
14 md5.update(i)
|
|
15 except FileNotFoundError as e:
|
|
16 if not fail_if_missing:
|
|
17 return "Not calculated!!!! File {} is missing".format(filename)
|
|
18 else:
|
|
19 raise e
|
|
20
|
|
21 return md5.hexdigest()
|
|
22
|
|
23
|
|
24 class FilePath(str):
|
|
25 '''
|
|
26 Extension of str - it just contain additional atribute showing that the string is alsp path to file
|
|
27 '''
|
|
28
|
|
29 def __new__(cls, string):
|
|
30 obj = super(FilePath, cls).__new__(cls, string)
|
|
31 obj.filepath = True
|
|
32 return obj
|
|
33
|
|
34 def relative(self, start):
|
|
35 ''' return path relative to start'''
|
|
36 return os.path.relpath(self, start)
|
|
37
|
|
38
|
|
39 def save_as_table(d, path, header=None, relative=True):
|
|
40 ''' takes list of dictionaries and save csv file
|
|
41 define header if you want to use specific order!
|
|
42 '''
|
|
43 pathdir = os.path.dirname(path)
|
|
44 if not header:
|
|
45
|
|
46 all_keys = [i.keys() for i in d]
|
|
47 header = set(chain(*all_keys))
|
|
48 print("header: ---------", header)
|
|
49 with open(path, 'w') as f:
|
|
50 f.write("\t".join(header))
|
|
51 f.write("\n")
|
|
52 for i in d:
|
|
53 istr = []
|
|
54 for key in header:
|
|
55 if isinstance(i[key], FilePath):
|
|
56 if relative:
|
|
57 istr.append('"' + str(i[key].relative(pathdir)) + '"')
|
|
58 else:
|
|
59 istr.append('"' + str(i[key]) + '"')
|
|
60 else:
|
|
61 if isinstance(i[key], str):
|
|
62 istr.append('"' + str(i[key] + '"'))
|
|
63 else:
|
|
64 istr.append(str(i[key]))
|
|
65
|
|
66 f.write("\t".join(istr))
|
|
67 f.write("\n")
|
|
68
|
|
69
|
|
70 def export_tandem_consensus(clusters_info, path, rank=1, n=1):
|
|
71 ''' export tr consensu to file'''
|
|
72 print("exporting fasta files")
|
|
73 print(clusters_info)
|
|
74 s = None
|
|
75 with open(path, 'w') as f:
|
|
76 for cl in clusters_info:
|
|
77 print(cl)
|
|
78 print(dir(cl))
|
|
79 if cl.TR_consensus and rank == cl.tandem_rank:
|
|
80 s = ">CL{index}_TR_{n}_x_{L}nt\n{sequence}\n".format(
|
|
81 index=cl.index,
|
|
82 n=n,
|
|
83 L=cl.TR_monomer_length,
|
|
84 sequence=n * cl.TR_consensus.replace('<pre>', ''))
|
|
85 f.write(s)
|
|
86 if s:
|
|
87 return path
|
|
88 else:
|
|
89 return None
|
|
90
|
|
91
|
|
92 def file_len(filename):
|
|
93 '''count number of lines in file'''
|
|
94 with open(filename) as f:
|
|
95 i = 0
|
|
96 for i in f:
|
|
97 i += i
|
|
98 return i
|
|
99
|
|
100 def go2line(f, L):
|
|
101 ''' find line L in file object f '''
|
|
102 f.seek(0)
|
|
103 if L == 0:
|
|
104 return
|
|
105 i = 0
|
|
106 pos = f.tell()
|
|
107 for line in f:
|
|
108 i += 1
|
|
109 if i == L:
|
|
110 f.seek(pos)
|
|
111 return
|
|
112 else:
|
|
113 pos = pos + len(line)
|
|
114
|
|
115 def format_query(x):
|
|
116 '''
|
|
117 make list for query in format ("x","y","x",...)
|
|
118 '''
|
|
119 out = '("'+ '","'.join(
|
|
120 map(str, x)
|
|
121 ) + '")'
|
|
122 return out
|