diff lib/utils.py @ 0:f6ebec6e235e draft

Uploaded
author petrn
date Thu, 19 Dec 2019 13:46:43 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/utils.py	Thu Dec 19 13:46:43 2019 +0000
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+import os
+import hashlib
+
+from itertools import chain
+
+
+
+def md5checksum(filename, fail_if_missing=True):
+    try:
+        md5 = hashlib.md5()
+        with open(filename, "rb") as f:
+            for i in iter(lambda: f.read(4096), b""):
+                md5.update(i)
+    except FileNotFoundError as e:
+        if not fail_if_missing:
+            return "Not calculated!!!!  File {} is missing".format(filename)
+        else:
+            raise e
+
+    return md5.hexdigest()
+
+
+class FilePath(str):
+    '''
+    Extension of str - it just contain additional atribute showing that the string is alsp path to file
+    '''
+
+    def __new__(cls, string):
+        obj = super(FilePath, cls).__new__(cls, string)
+        obj.filepath = True
+        return obj
+
+    def relative(self, start):
+        ''' return path relative to start'''
+        return os.path.relpath(self, start)
+
+
+def save_as_table(d, path, header=None, relative=True):
+    ''' takes list of dictionaries and save csv file
+    define header if you want to use specific order!
+    '''
+    pathdir = os.path.dirname(path)
+    if not header:
+        
+        all_keys = [i.keys() for i in d]
+        header = set(chain(*all_keys))
+        print("header: ---------", header)
+    with open(path, 'w') as f:
+        f.write("\t".join(header))
+        f.write("\n")
+        for i in d:
+            istr = []
+            for key in header:
+                if isinstance(i[key], FilePath):
+                    if relative:
+                        istr.append('"' + str(i[key].relative(pathdir)) + '"')
+                    else:
+                        istr.append('"' + str(i[key]) + '"')
+                else:
+                    if isinstance(i[key], str):
+                        istr.append('"' + str(i[key] + '"'))
+                    else:
+                        istr.append(str(i[key]))
+
+            f.write("\t".join(istr))
+            f.write("\n")
+
+
+def export_tandem_consensus(clusters_info, path, rank=1, n=1):
+    ''' export tr consensu to file'''
+    print("exporting fasta files")
+    print(clusters_info)
+    s = None
+    with open(path, 'w') as f:
+        for cl in clusters_info:
+            print(cl)
+            print(dir(cl))
+            if cl.TR_consensus and rank == cl.tandem_rank:
+                s = ">CL{index}_TR_{n}_x_{L}nt\n{sequence}\n".format(
+                    index=cl.index,
+                    n=n,
+                    L=cl.TR_monomer_length,
+                    sequence=n * cl.TR_consensus.replace('<pre>', ''))
+                f.write(s)
+    if s:
+        return path
+    else:
+        return None
+
+
+def file_len(filename):
+    '''count number of lines in file'''
+    with open(filename) as f:
+        i = 0
+        for i in f:
+            i += i
+    return i
+
+def go2line(f, L):
+    ''' find line L in file object f '''
+    f.seek(0)
+    if L == 0:
+        return
+    i = 0
+    pos = f.tell()
+    for line in f:
+        i += 1
+        if i == L:
+            f.seek(pos)
+            return
+        else:
+            pos = pos + len(line)
+
+def format_query(x):
+    '''
+    make list for query in format ("x","y","x",...)
+    '''
+    out = '("'+ '","'.join(
+        map(str, x)
+    ) + '")'
+    return out