comparison extract_p2c_mapping.py @ 0:51aaa210d1ee draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/vcontact2 commit 7bf2bea944495d304eeb2df687b9e1a046fb8026
author iuc
date Wed, 04 Feb 2026 14:31:41 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:51aaa210d1ee
1 import re
2 import sys
3
4
5 def main(in_file, bins_file, out_file, pattern):
6 members = {}
7 if bins_file != 'None':
8 with open(bins_file) as bins:
9 next(bins)
10 for m in bins:
11 name, binNr = m.split('\t')
12 contig = name.strip()
13 members[contig] = "bin_" + binNr.strip()
14
15 with open(in_file, 'r') as f, open(out_file, 'w') as g:
16 print(f"using pattern '{pattern}'")
17 g.write("protein_id,contig_id,keywords\n")
18 # Patterns: prodigal: /^>(.*?)_([0-9]*) #/ phanotate: /^>(.*?)_CDS_([0-9]*) /
19 for line in f:
20 if line.startswith(">"):
21 match = re.match(pattern, line)
22 if not match:
23 print("failed to match", line)
24 protein = match.group(1)
25 contig = match.group(2)
26 if contig in members:
27 contig = members[contig]
28 g.write(f"{protein},{contig},None\n")
29
30
31 if __name__ == "__main__":
32 main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])