annotate match_plasmid_to_reference.py @ 0:8bb674372911 draft

"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
author dfornika
date Wed, 06 Nov 2019 00:08:43 -0500
parents
children 3616b6eda1da
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
1 #!/usr/bin/env python
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
2
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
3 from __future__ import print_function
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
4
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
5 import argparse
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
6 import csv
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
7 import errno
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
8 import json
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
9 import os
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
10 import re
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
11 import shutil
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
12 import sys
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
13
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
14 from pprint import pprint
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
15
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
16 MOB_TYPER_FIELDNAMES = [
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
17 "file_id",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
18 "num_contigs",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
19 "total_length",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
20 "gc",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
21 "rep_type(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
22 "rep_type_accession(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
23 "relaxase_type(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
24 "relaxase_type_accession(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
25 "mpf_type",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
26 "mpf_type_accession(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
27 "orit_type(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
28 "orit_accession(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
29 "PredictedMobility",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
30 "mash_nearest_neighbor",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
31 "mash_neighbor_distance",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
32 "mash_neighbor_cluster",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
33 "NCBI-HR-rank",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
34 "NCBI-HR-Name",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
35 "LitRepHRPlasmClass",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
36 "LitPredDBHRRank",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
37 "LitPredDBHRRankSciName",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
38 "LitRepHRRankInPubs",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
39 "LitRepHRNameInPubs",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
40 "LitMeanTransferRate",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
41 "LitClosestRefAcc",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
42 "LitClosestRefDonorStrain",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
43 "LitClosestRefRecipientStrain",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
44 "LitClosestRefTransferRate",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
45 "LitClosestConjugTemp",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
46 "LitPMIDs",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
47 "LitPMIDsNumber",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
48 ]
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
49
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
50 def parse_mob_typer_report(mob_typer_report_path):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
51 mob_typer_report = []
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
52
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
53 with open(mob_typer_report_path) as f:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
54 reader = csv.DictReader(f, delimiter="\t", quotechar='"', fieldnames=MOB_TYPER_FIELDNAMES)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
55 for row in reader:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
56 mob_typer_report.append(row)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
57 return mob_typer_report
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
58
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
59 def parse_genbank_accession(genbank_file_path):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
60 with open(genbank_file_path, 'r') as f:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
61 while True:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
62 line = f.readline()
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
63 # break while statement if it is not a comment line
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
64 # i.e. does not startwith #
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
65 if line.startswith('ACCESSION'):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
66 return line.strip().split()[1]
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
67
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
68
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
69
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
70 def count_contigs(plasmid_fasta_path):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
71 contigs = 0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
72 with open(plasmid_fasta_path, 'r') as f:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
73 contigs = 2
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
74 return contigs
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
75
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
76 def count_bases(plasmid_fasta_path):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
77 bases = 0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
78 with open(plasmid_fasta_path, 'r') as f:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
79 bases = 11117
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
80 return bases
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
81
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
82 def main(args):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
83 # create output directory
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
84 try:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
85 os.mkdir(args.outdir)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
86 except OSError as exc:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
87 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
88 pass
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
89 else:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
90 raise
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
91
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
92 # parse mob_typer report
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
93 mob_typer_report = parse_mob_typer_report(args.mob_typer_report)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
94 num_plasmid_contigs = count_contigs(args.plasmid)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
95 num_plasmid_bases = count_bases(args.plasmid)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
96
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
97 with open(os.path.join(args.outdir, 'mob_typer_record.tsv'), 'w') as f:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
98 mob_typer_record_writer = csv.DictWriter(f, delimiter="\t", quotechar='"', fieldnames=MOB_TYPER_FIELDNAMES)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
99 mob_typer_record_writer.writeheader()
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
100 for record in mob_typer_report:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
101 if num_plasmid_contigs == int(record['num_contigs']) and num_plasmid_bases == int(record['total_length']):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
102 for reference_plasmid in args.reference_plasmids:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
103 if parse_genbank_accession(reference_plasmid) == record['mash_nearest_neighbor']:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
104 shutil.copy2(reference_plasmid, os.path.join(args.outdir, "reference_plasmid.gbk"))
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
105 mob_typer_record_writer.writerow(record)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
106
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
107 shutil.copy2(args.plasmid, os.path.join(args.outdir, "plasmid.fasta"))
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
108
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
109
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
110 if __name__ == '__main__':
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
111 parser = argparse.ArgumentParser()
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
112 parser.add_argument("--plasmid", help="plasmid assembly (fasta)")
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
113 parser.add_argument("--reference_plasmids", nargs='+', help="reference plasmids (genbank)")
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
114 parser.add_argument("--mob_typer_report", help="mob_typer reports (tsv)")
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
115 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory")
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
116 args = parser.parse_args()
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
117 main(args)