Mercurial > repos > dfornika > match_plasmid_to_reference
annotate match_plasmid_to_reference.py @ 1:3616b6eda1da draft
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
author | dfornika |
---|---|
date | Wed, 06 Nov 2019 00:42:05 -0500 |
parents | 8bb674372911 |
children | 826ddf832bef |
rev | line source |
---|---|
0
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
1 #!/usr/bin/env python |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
2 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
3 from __future__ import print_function |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
4 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
5 import argparse |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
6 import csv |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
7 import errno |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
8 import json |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
9 import os |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
10 import re |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
11 import shutil |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
12 import sys |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
13 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
14 from pprint import pprint |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
15 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
16 MOB_TYPER_FIELDNAMES = [ |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
17 "file_id", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
18 "num_contigs", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
19 "total_length", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
20 "gc", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
21 "rep_type(s)", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
22 "rep_type_accession(s)", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
23 "relaxase_type(s)", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
24 "relaxase_type_accession(s)", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
25 "mpf_type", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
26 "mpf_type_accession(s)", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
27 "orit_type(s)", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
28 "orit_accession(s)", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
29 "PredictedMobility", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
30 "mash_nearest_neighbor", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
31 "mash_neighbor_distance", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
32 "mash_neighbor_cluster", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
33 "NCBI-HR-rank", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
34 "NCBI-HR-Name", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
35 "LitRepHRPlasmClass", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
36 "LitPredDBHRRank", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
37 "LitPredDBHRRankSciName", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
38 "LitRepHRRankInPubs", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
39 "LitRepHRNameInPubs", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
40 "LitMeanTransferRate", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
41 "LitClosestRefAcc", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
42 "LitClosestRefDonorStrain", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
43 "LitClosestRefRecipientStrain", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
44 "LitClosestRefTransferRate", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
45 "LitClosestConjugTemp", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
46 "LitPMIDs", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
47 "LitPMIDsNumber", |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
48 ] |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
49 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
50 def parse_mob_typer_report(mob_typer_report_path): |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
51 mob_typer_report = [] |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
52 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
53 with open(mob_typer_report_path) as f: |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
54 reader = csv.DictReader(f, delimiter="\t", quotechar='"', fieldnames=MOB_TYPER_FIELDNAMES) |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
55 for row in reader: |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
56 mob_typer_report.append(row) |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
57 return mob_typer_report |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
58 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
59 def parse_genbank_accession(genbank_file_path): |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
60 with open(genbank_file_path, 'r') as f: |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
61 while True: |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
62 line = f.readline() |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
63 # break while statement if it is not a comment line |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
64 # i.e. does not startwith # |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
65 if line.startswith('ACCESSION'): |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
66 return line.strip().split()[1] |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
67 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
68 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
69 def count_contigs(plasmid_fasta_path): |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
70 contigs = 0 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
71 with open(plasmid_fasta_path, 'r') as f: |
1
3616b6eda1da
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
0
diff
changeset
|
72 for line in f: |
3616b6eda1da
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
0
diff
changeset
|
73 if line.startswith('>'): |
3616b6eda1da
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
0
diff
changeset
|
74 contigs += 1 |
0
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
75 return contigs |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
76 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
77 def count_bases(plasmid_fasta_path): |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
78 bases = 0 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
79 with open(plasmid_fasta_path, 'r') as f: |
1
3616b6eda1da
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
0
diff
changeset
|
80 for line in f: |
3616b6eda1da
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
0
diff
changeset
|
81 line = line.strip() |
3616b6eda1da
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
0
diff
changeset
|
82 if not line.startswith('>'): |
3616b6eda1da
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
0
diff
changeset
|
83 bases += len(line) |
0
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
84 return bases |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
85 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
86 def main(args): |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
87 # create output directory |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
88 try: |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
89 os.mkdir(args.outdir) |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
90 except OSError as exc: |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
91 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir): |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
92 pass |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
93 else: |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
94 raise |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
95 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
96 # parse mob_typer report |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
97 mob_typer_report = parse_mob_typer_report(args.mob_typer_report) |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
98 num_plasmid_contigs = count_contigs(args.plasmid) |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
99 num_plasmid_bases = count_bases(args.plasmid) |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
100 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
101 with open(os.path.join(args.outdir, 'mob_typer_record.tsv'), 'w') as f: |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
102 mob_typer_record_writer = csv.DictWriter(f, delimiter="\t", quotechar='"', fieldnames=MOB_TYPER_FIELDNAMES) |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
103 mob_typer_record_writer.writeheader() |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
104 for record in mob_typer_report: |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
105 if num_plasmid_contigs == int(record['num_contigs']) and num_plasmid_bases == int(record['total_length']): |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
106 for reference_plasmid in args.reference_plasmids: |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
107 if parse_genbank_accession(reference_plasmid) == record['mash_nearest_neighbor']: |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
108 shutil.copy2(reference_plasmid, os.path.join(args.outdir, "reference_plasmid.gbk")) |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
109 mob_typer_record_writer.writerow(record) |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
110 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
111 shutil.copy2(args.plasmid, os.path.join(args.outdir, "plasmid.fasta")) |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
112 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
113 |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
114 if __name__ == '__main__': |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
115 parser = argparse.ArgumentParser() |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
116 parser.add_argument("--plasmid", help="plasmid assembly (fasta)") |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
117 parser.add_argument("--reference_plasmids", nargs='+', help="reference plasmids (genbank)") |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
118 parser.add_argument("--mob_typer_report", help="mob_typer reports (tsv)") |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
119 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory") |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
120 args = parser.parse_args() |
8bb674372911
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
dfornika
parents:
diff
changeset
|
121 main(args) |