Mercurial > repos > dfornika > match_plasmid_to_reference
comparison match_plasmid_to_reference.py @ 0:8bb674372911 draft
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
author | dfornika |
---|---|
date | Wed, 06 Nov 2019 00:08:43 -0500 |
parents | |
children | 3616b6eda1da |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8bb674372911 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 from __future__ import print_function | |
4 | |
5 import argparse | |
6 import csv | |
7 import errno | |
8 import json | |
9 import os | |
10 import re | |
11 import shutil | |
12 import sys | |
13 | |
14 from pprint import pprint | |
15 | |
16 MOB_TYPER_FIELDNAMES = [ | |
17 "file_id", | |
18 "num_contigs", | |
19 "total_length", | |
20 "gc", | |
21 "rep_type(s)", | |
22 "rep_type_accession(s)", | |
23 "relaxase_type(s)", | |
24 "relaxase_type_accession(s)", | |
25 "mpf_type", | |
26 "mpf_type_accession(s)", | |
27 "orit_type(s)", | |
28 "orit_accession(s)", | |
29 "PredictedMobility", | |
30 "mash_nearest_neighbor", | |
31 "mash_neighbor_distance", | |
32 "mash_neighbor_cluster", | |
33 "NCBI-HR-rank", | |
34 "NCBI-HR-Name", | |
35 "LitRepHRPlasmClass", | |
36 "LitPredDBHRRank", | |
37 "LitPredDBHRRankSciName", | |
38 "LitRepHRRankInPubs", | |
39 "LitRepHRNameInPubs", | |
40 "LitMeanTransferRate", | |
41 "LitClosestRefAcc", | |
42 "LitClosestRefDonorStrain", | |
43 "LitClosestRefRecipientStrain", | |
44 "LitClosestRefTransferRate", | |
45 "LitClosestConjugTemp", | |
46 "LitPMIDs", | |
47 "LitPMIDsNumber", | |
48 ] | |
49 | |
50 def parse_mob_typer_report(mob_typer_report_path): | |
51 mob_typer_report = [] | |
52 | |
53 with open(mob_typer_report_path) as f: | |
54 reader = csv.DictReader(f, delimiter="\t", quotechar='"', fieldnames=MOB_TYPER_FIELDNAMES) | |
55 for row in reader: | |
56 mob_typer_report.append(row) | |
57 return mob_typer_report | |
58 | |
59 def parse_genbank_accession(genbank_file_path): | |
60 with open(genbank_file_path, 'r') as f: | |
61 while True: | |
62 line = f.readline() | |
63 # break while statement if it is not a comment line | |
64 # i.e. does not startwith # | |
65 if line.startswith('ACCESSION'): | |
66 return line.strip().split()[1] | |
67 | |
68 | |
69 | |
70 def count_contigs(plasmid_fasta_path): | |
71 contigs = 0 | |
72 with open(plasmid_fasta_path, 'r') as f: | |
73 contigs = 2 | |
74 return contigs | |
75 | |
76 def count_bases(plasmid_fasta_path): | |
77 bases = 0 | |
78 with open(plasmid_fasta_path, 'r') as f: | |
79 bases = 11117 | |
80 return bases | |
81 | |
82 def main(args): | |
83 # create output directory | |
84 try: | |
85 os.mkdir(args.outdir) | |
86 except OSError as exc: | |
87 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir): | |
88 pass | |
89 else: | |
90 raise | |
91 | |
92 # parse mob_typer report | |
93 mob_typer_report = parse_mob_typer_report(args.mob_typer_report) | |
94 num_plasmid_contigs = count_contigs(args.plasmid) | |
95 num_plasmid_bases = count_bases(args.plasmid) | |
96 | |
97 with open(os.path.join(args.outdir, 'mob_typer_record.tsv'), 'w') as f: | |
98 mob_typer_record_writer = csv.DictWriter(f, delimiter="\t", quotechar='"', fieldnames=MOB_TYPER_FIELDNAMES) | |
99 mob_typer_record_writer.writeheader() | |
100 for record in mob_typer_report: | |
101 if num_plasmid_contigs == int(record['num_contigs']) and num_plasmid_bases == int(record['total_length']): | |
102 for reference_plasmid in args.reference_plasmids: | |
103 if parse_genbank_accession(reference_plasmid) == record['mash_nearest_neighbor']: | |
104 shutil.copy2(reference_plasmid, os.path.join(args.outdir, "reference_plasmid.gbk")) | |
105 mob_typer_record_writer.writerow(record) | |
106 | |
107 shutil.copy2(args.plasmid, os.path.join(args.outdir, "plasmid.fasta")) | |
108 | |
109 | |
110 if __name__ == '__main__': | |
111 parser = argparse.ArgumentParser() | |
112 parser.add_argument("--plasmid", help="plasmid assembly (fasta)") | |
113 parser.add_argument("--reference_plasmids", nargs='+', help="reference plasmids (genbank)") | |
114 parser.add_argument("--mob_typer_report", help="mob_typer reports (tsv)") | |
115 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory") | |
116 args = parser.parse_args() | |
117 main(args) |