Mercurial > repos > iuc > vcontact2
comparison extract_p2c_mapping.py @ 0:51aaa210d1ee draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/vcontact2 commit 7bf2bea944495d304eeb2df687b9e1a046fb8026
| author | iuc |
|---|---|
| date | Wed, 04 Feb 2026 14:31:41 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:51aaa210d1ee |
|---|---|
| 1 import re | |
| 2 import sys | |
| 3 | |
| 4 | |
| 5 def main(in_file, bins_file, out_file, pattern): | |
| 6 members = {} | |
| 7 if bins_file != 'None': | |
| 8 with open(bins_file) as bins: | |
| 9 next(bins) | |
| 10 for m in bins: | |
| 11 name, binNr = m.split('\t') | |
| 12 contig = name.strip() | |
| 13 members[contig] = "bin_" + binNr.strip() | |
| 14 | |
| 15 with open(in_file, 'r') as f, open(out_file, 'w') as g: | |
| 16 print(f"using pattern '{pattern}'") | |
| 17 g.write("protein_id,contig_id,keywords\n") | |
| 18 # Patterns: prodigal: /^>(.*?)_([0-9]*) #/ phanotate: /^>(.*?)_CDS_([0-9]*) / | |
| 19 for line in f: | |
| 20 if line.startswith(">"): | |
| 21 match = re.match(pattern, line) | |
| 22 if not match: | |
| 23 print("failed to match", line) | |
| 24 protein = match.group(1) | |
| 25 contig = match.group(2) | |
| 26 if contig in members: | |
| 27 contig = members[contig] | |
| 28 g.write(f"{protein},{contig},None\n") | |
| 29 | |
| 30 | |
| 31 if __name__ == "__main__": | |
| 32 main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) |
