annotate match_plasmid_to_reference.py @ 4:826ddf832bef draft default tip

"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
author dfornika
date Wed, 06 Nov 2019 13:52:40 -0500
parents 3616b6eda1da
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
1 #!/usr/bin/env python
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
2
4
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
3 from __future__ import print_function, division
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
4
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
5 import argparse
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
6 import csv
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
7 import errno
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
8 import json
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
9 import os
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
10 import re
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
11 import shutil
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
12 import sys
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
13
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
14 from pprint import pprint
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
15
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
16 MOB_TYPER_FIELDNAMES = [
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
17 "file_id",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
18 "num_contigs",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
19 "total_length",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
20 "gc",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
21 "rep_type(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
22 "rep_type_accession(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
23 "relaxase_type(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
24 "relaxase_type_accession(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
25 "mpf_type",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
26 "mpf_type_accession(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
27 "orit_type(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
28 "orit_accession(s)",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
29 "PredictedMobility",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
30 "mash_nearest_neighbor",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
31 "mash_neighbor_distance",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
32 "mash_neighbor_cluster",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
33 "NCBI-HR-rank",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
34 "NCBI-HR-Name",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
35 "LitRepHRPlasmClass",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
36 "LitPredDBHRRank",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
37 "LitPredDBHRRankSciName",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
38 "LitRepHRRankInPubs",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
39 "LitRepHRNameInPubs",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
40 "LitMeanTransferRate",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
41 "LitClosestRefAcc",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
42 "LitClosestRefDonorStrain",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
43 "LitClosestRefRecipientStrain",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
44 "LitClosestRefTransferRate",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
45 "LitClosestConjugTemp",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
46 "LitPMIDs",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
47 "LitPMIDsNumber",
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
48 ]
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
49
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
50 def parse_mob_typer_report(mob_typer_report_path):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
51 mob_typer_report = []
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
52
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
53 with open(mob_typer_report_path) as f:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
54 reader = csv.DictReader(f, delimiter="\t", quotechar='"', fieldnames=MOB_TYPER_FIELDNAMES)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
55 for row in reader:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
56 mob_typer_report.append(row)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
57 return mob_typer_report
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
58
4
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
59 def parse_genbank_accession(genbank_path):
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
60 with open(genbank_path, 'r') as f:
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
61 while True:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
62 line = f.readline()
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
63 if line.startswith('ACCESSION'):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
64 return line.strip().split()[1]
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
65
4
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
66 def parse_fasta_accession(fasta_path):
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
67 with open(fasta_path, 'r') as f:
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
68 while True:
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
69 line = f.readline()
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
70 if line.startswith('>'):
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
71 return line.strip().split()[0][1:]
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
72
4
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
73 def count_fasta_contigs(fasta_path):
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
74 contigs = 0
4
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
75 with open(fasta_path, 'r') as f:
1
3616b6eda1da "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 0
diff changeset
76 for line in f:
3616b6eda1da "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 0
diff changeset
77 if line.startswith('>'):
3616b6eda1da "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 0
diff changeset
78 contigs += 1
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
79 return contigs
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
80
4
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
81 def count_fasta_bases(fasta_path):
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
82 bases = 0
4
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
83 with open(fasta_path, 'r') as f:
1
3616b6eda1da "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 0
diff changeset
84 for line in f:
3616b6eda1da "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 0
diff changeset
85 line = line.strip()
3616b6eda1da "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 0
diff changeset
86 if not line.startswith('>'):
3616b6eda1da "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 0
diff changeset
87 bases += len(line)
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
88 return bases
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
89
4
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
90 def compute_fasta_gc_percent(fasta_path):
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
91 gc_count = 0
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
92 total_bases_count = 0
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
93 with open(fasta_path, 'r') as f:
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
94 for line in f:
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
95 if not line.startswith('>'):
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
96 line = line.strip()
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
97 line_c_count = line.count('c') + line.count('C')
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
98 line_g_count = line.count('g') + line.count('G')
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
99 line_total_bases_count = len(line)
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
100 gc_count += line_c_count + line_g_count
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
101 total_bases_count += line_total_bases_count
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
102 return 100 * (gc_count / total_bases_count)
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
103
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
104 def main(args):
4
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
105
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
106 # create output directory
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
107 try:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
108 os.mkdir(args.outdir)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
109 except OSError as exc:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
110 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir):
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
111 pass
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
112 else:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
113 raise
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
114
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
115 # parse mob_typer report
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
116 mob_typer_report = parse_mob_typer_report(args.mob_typer_report)
4
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
117 num_plasmid_contigs = count_fasta_contigs(args.plasmid)
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
118 num_plasmid_bases = count_fasta_bases(args.plasmid)
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
119 plasmid_gc_percent = compute_fasta_gc_percent(args.plasmid)
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
120
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
121 with open(os.path.join(args.outdir, 'mob_typer_record.tsv'), 'w') as f:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
122 mob_typer_record_writer = csv.DictWriter(f, delimiter="\t", quotechar='"', fieldnames=MOB_TYPER_FIELDNAMES)
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
123 mob_typer_record_writer.writeheader()
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
124 for record in mob_typer_report:
4
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
125 # match the plasmid against three properties in the MOB-Typer report:
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
126 # 1. number of contigs
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
127 # 2. total length of all contigs
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
128 # 3. G/C percent (within +/-0.1%)
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
129 if num_plasmid_contigs == int(record['num_contigs']) and \
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
130 num_plasmid_bases == int(record['total_length']) and \
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
131 abs(plasmid_gc_percent - float(record['gc'])) < 0.1:
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
132 for reference_plasmid in args.reference_plasmids_genbank:
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
133 if parse_genbank_accession(reference_plasmid) == record['mash_nearest_neighbor']:
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
134 shutil.copy2(reference_plasmid, os.path.join(args.outdir, "reference_plasmid.gbk"))
4
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
135
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
136 for reference_plasmid in args.reference_plasmids_fasta:
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
137 if re.match(record['mash_nearest_neighbor'], parse_fasta_accession(reference_plasmid)) is not None:
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
138 shutil.copy2(reference_plasmid, os.path.join(args.outdir, "reference_plasmid.fasta"))
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
139 mob_typer_record_writer.writerow(record)
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
140
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
141 shutil.copy2(args.plasmid, os.path.join(args.outdir, "plasmid.fasta"))
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
142
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
143
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
144 if __name__ == '__main__':
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
145 parser = argparse.ArgumentParser()
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
146 parser.add_argument("--plasmid", help="plasmid assembly (fasta)")
4
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
147 parser.add_argument("--reference_plasmids_genbank", nargs='+', help="reference plasmids (genbank)")
826ddf832bef "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents: 1
diff changeset
148 parser.add_argument("--reference_plasmids_fasta", nargs='+', help="reference plasmids (fasta)")
0
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
149 parser.add_argument("--mob_typer_report", help="mob_typer reports (tsv)")
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
150 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory")
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
151 args = parser.parse_args()
8bb674372911 "planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff changeset
152 main(args)