diff extract_p2c_mapping.py @ 0:51aaa210d1ee draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/vcontact2 commit 7bf2bea944495d304eeb2df687b9e1a046fb8026
author iuc
date Wed, 04 Feb 2026 14:31:41 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_p2c_mapping.py	Wed Feb 04 14:31:41 2026 +0000
@@ -0,0 +1,32 @@
+import re
+import sys
+
+
+def main(in_file, bins_file, out_file, pattern):
+    members = {}
+    if bins_file != 'None':
+        with open(bins_file) as bins:
+            next(bins)
+            for m in bins:
+                name, binNr = m.split('\t')
+                contig = name.strip()
+                members[contig] = "bin_" + binNr.strip()
+
+    with open(in_file, 'r') as f, open(out_file, 'w') as g:
+        print(f"using pattern '{pattern}'")
+        g.write("protein_id,contig_id,keywords\n")
+        # Patterns: prodigal: /^>(.*?)_([0-9]*) #/       phanotate: /^>(.*?)_CDS_([0-9]*) /
+        for line in f:
+            if line.startswith(">"):
+                match = re.match(pattern, line)
+                if not match:
+                    print("failed to match", line)
+                protein = match.group(1)
+                contig = match.group(2)
+                if contig in members:
+                    contig = members[contig]
+                g.write(f"{protein},{contig},None\n")
+
+
+if __name__ == "__main__":
+    main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])