Mercurial > repos > iuc > vcontact2
diff extract_p2c_mapping.py @ 0:51aaa210d1ee draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/vcontact2 commit 7bf2bea944495d304eeb2df687b9e1a046fb8026
| author | iuc |
|---|---|
| date | Wed, 04 Feb 2026 14:31:41 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_p2c_mapping.py Wed Feb 04 14:31:41 2026 +0000 @@ -0,0 +1,32 @@ +import re +import sys + + +def main(in_file, bins_file, out_file, pattern): + members = {} + if bins_file != 'None': + with open(bins_file) as bins: + next(bins) + for m in bins: + name, binNr = m.split('\t') + contig = name.strip() + members[contig] = "bin_" + binNr.strip() + + with open(in_file, 'r') as f, open(out_file, 'w') as g: + print(f"using pattern '{pattern}'") + g.write("protein_id,contig_id,keywords\n") + # Patterns: prodigal: /^>(.*?)_([0-9]*) #/ phanotate: /^>(.*?)_CDS_([0-9]*) / + for line in f: + if line.startswith(">"): + match = re.match(pattern, line) + if not match: + print("failed to match", line) + protein = match.group(1) + contig = match.group(2) + if contig in members: + contig = members[contig] + g.write(f"{protein},{contig},None\n") + + +if __name__ == "__main__": + main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
