annotate match_plasmid_to_reference.py @ 1:3616b6eda1da draft

"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
author dfornika
date Wed, 06 Nov 2019 00:42:05 -0500
parents 8bb674372911
children 826ddf832bef
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
1 #!/usr/bin/env python
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
2
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
3 from __future__ import print_function
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
4
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
5 import argparse
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
6 import csv
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
7 import errno
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
8 import json
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
9 import os
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
10 import re
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
11 import shutil
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
12 import sys
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
13
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
14 from pprint import pprint
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
15
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
16 MOB_TYPER_FIELDNAMES = [
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
17 "file_id",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
18 "num_contigs",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
19 "total_length",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
20 "gc",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
21 "rep_type(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
22 "rep_type_accession(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
23 "relaxase_type(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
24 "relaxase_type_accession(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
25 "mpf_type",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
26 "mpf_type_accession(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
27 "orit_type(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
28 "orit_accession(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
29 "PredictedMobility",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
30 "mash_nearest_neighbor",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
31 "mash_neighbor_distance",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
32 "mash_neighbor_cluster",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
33 "NCBI-HR-rank",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
34 "NCBI-HR-Name",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
35 "LitRepHRPlasmClass",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
36 "LitPredDBHRRank",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
37 "LitPredDBHRRankSciName",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
38 "LitRepHRRankInPubs",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
39 "LitRepHRNameInPubs",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
40 "LitMeanTransferRate",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
41 "LitClosestRefAcc",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
42 "LitClosestRefDonorStrain",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
43 "LitClosestRefRecipientStrain",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
44 "LitClosestRefTransferRate",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
45 "LitClosestConjugTemp",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
46 "LitPMIDs",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
47 "LitPMIDsNumber",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
48 ]
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
49
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
50 def parse_mob_typer_report(mob_typer_report_path):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
51 mob_typer_report = []
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
52
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
53 with open(mob_typer_report_path) as f:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
54 reader = csv.DictReader(f, delimiter="\t", quotechar='"', fieldnames=MOB_TYPER_FIELDNAMES)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
55 for row in reader:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
56 mob_typer_report.append(row)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
57 return mob_typer_report
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
58
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
59 def parse_genbank_accession(genbank_file_path):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
60 with open(genbank_file_path, 'r') as f:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
61 while True:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
62 line = f.readline()
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
63 # break while statement if it is not a comment line
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
64 # i.e. does not startwith #
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
65 if line.startswith('ACCESSION'):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
66 return line.strip().split()[1]
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
67
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
68
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
69 def count_contigs(plasmid_fasta_path):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
70 contigs = 0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
71 with open(plasmid_fasta_path, 'r') as f:
1
3616b6eda1da "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 0
diff changeset
72 for line in f:
3616b6eda1da "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 0
diff changeset
73 if line.startswith('>'):
3616b6eda1da "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 0
diff changeset
74 contigs += 1
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
75 return contigs
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
76
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
77 def count_bases(plasmid_fasta_path):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
78 bases = 0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
79 with open(plasmid_fasta_path, 'r') as f:
1
3616b6eda1da "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 0
diff changeset
80 for line in f:
3616b6eda1da "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 0
diff changeset
81 line = line.strip()
3616b6eda1da "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 0
diff changeset
82 if not line.startswith('>'):
3616b6eda1da "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 0
diff changeset
83 bases += len(line)
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
84 return bases
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
85
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
86 def main(args):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
87 # create output directory
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
88 try:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
89 os.mkdir(args.outdir)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
90 except OSError as exc:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
91 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
92 pass
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
93 else:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
94 raise
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
95
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
96 # parse mob_typer report
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
97 mob_typer_report = parse_mob_typer_report(args.mob_typer_report)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
98 num_plasmid_contigs = count_contigs(args.plasmid)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
99 num_plasmid_bases = count_bases(args.plasmid)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
100
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
101 with open(os.path.join(args.outdir, 'mob_typer_record.tsv'), 'w') as f:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
102 mob_typer_record_writer = csv.DictWriter(f, delimiter="\t", quotechar='"', fieldnames=MOB_TYPER_FIELDNAMES)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
103 mob_typer_record_writer.writeheader()
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
104 for record in mob_typer_report:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
105 if num_plasmid_contigs == int(record['num_contigs']) and num_plasmid_bases == int(record['total_length']):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
106 for reference_plasmid in args.reference_plasmids:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
107 if parse_genbank_accession(reference_plasmid) == record['mash_nearest_neighbor']:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
108 shutil.copy2(reference_plasmid, os.path.join(args.outdir, "reference_plasmid.gbk"))
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
109 mob_typer_record_writer.writerow(record)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
110
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
111 shutil.copy2(args.plasmid, os.path.join(args.outdir, "plasmid.fasta"))
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
112
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
113
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
114 if __name__ == '__main__':
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
115 parser = argparse.ArgumentParser()
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
116 parser.add_argument("--plasmid", help="plasmid assembly (fasta)")
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
117 parser.add_argument("--reference_plasmids", nargs='+', help="reference plasmids (genbank)")
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
118 parser.add_argument("--mob_typer_report", help="mob_typer reports (tsv)")
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
119 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory")
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
120 args = parser.parse_args()
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
121 main(args)