# HG changeset patch
# User dfornika
# Date 1573066360 18000
# Node ID 826ddf832bef1a1dfa21b1937d8eb10bb90a7a48
# Parent d56b4f7437791008972d5063ecc7e1a164a80cb1
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
diff -r d56b4f743779 -r 826ddf832bef match_plasmid_to_reference.py
--- a/match_plasmid_to_reference.py Wed Nov 06 01:20:36 2019 -0500
+++ b/match_plasmid_to_reference.py Wed Nov 06 13:52:40 2019 -0500
@@ -1,6 +1,6 @@
#!/usr/bin/env python
-from __future__ import print_function
+from __future__ import print_function, division
import argparse
import csv
@@ -56,34 +56,53 @@
mob_typer_report.append(row)
return mob_typer_report
-def parse_genbank_accession(genbank_file_path):
- with open(genbank_file_path, 'r') as f:
+def parse_genbank_accession(genbank_path):
+ with open(genbank_path, 'r') as f:
while True:
line = f.readline()
- # break while statement if it is not a comment line
- # i.e. does not startwith #
if line.startswith('ACCESSION'):
return line.strip().split()[1]
+def parse_fasta_accession(fasta_path):
+ with open(fasta_path, 'r') as f:
+ while True:
+ line = f.readline()
+ if line.startswith('>'):
+ return line.strip().split()[0][1:]
-def count_contigs(plasmid_fasta_path):
+def count_fasta_contigs(fasta_path):
contigs = 0
- with open(plasmid_fasta_path, 'r') as f:
+ with open(fasta_path, 'r') as f:
for line in f:
if line.startswith('>'):
contigs += 1
return contigs
-def count_bases(plasmid_fasta_path):
+def count_fasta_bases(fasta_path):
bases = 0
- with open(plasmid_fasta_path, 'r') as f:
+ with open(fasta_path, 'r') as f:
for line in f:
line = line.strip()
if not line.startswith('>'):
bases += len(line)
return bases
+def compute_fasta_gc_percent(fasta_path):
+ gc_count = 0
+ total_bases_count = 0
+ with open(fasta_path, 'r') as f:
+ for line in f:
+ if not line.startswith('>'):
+ line = line.strip()
+ line_c_count = line.count('c') + line.count('C')
+ line_g_count = line.count('g') + line.count('G')
+ line_total_bases_count = len(line)
+ gc_count += line_c_count + line_g_count
+ total_bases_count += line_total_bases_count
+ return 100 * (gc_count / total_bases_count)
+
def main(args):
+
# create output directory
try:
os.mkdir(args.outdir)
@@ -95,18 +114,29 @@
# parse mob_typer report
mob_typer_report = parse_mob_typer_report(args.mob_typer_report)
- num_plasmid_contigs = count_contigs(args.plasmid)
- num_plasmid_bases = count_bases(args.plasmid)
-
+ num_plasmid_contigs = count_fasta_contigs(args.plasmid)
+ num_plasmid_bases = count_fasta_bases(args.plasmid)
+ plasmid_gc_percent = compute_fasta_gc_percent(args.plasmid)
+
with open(os.path.join(args.outdir, 'mob_typer_record.tsv'), 'w') as f:
mob_typer_record_writer = csv.DictWriter(f, delimiter="\t", quotechar='"', fieldnames=MOB_TYPER_FIELDNAMES)
mob_typer_record_writer.writeheader()
for record in mob_typer_report:
- if num_plasmid_contigs == int(record['num_contigs']) and num_plasmid_bases == int(record['total_length']):
- for reference_plasmid in args.reference_plasmids:
+ # match the plasmid against three properties in the MOB-Typer report:
+ # 1. number of contigs
+ # 2. total length of all contigs
+ # 3. G/C percent (within +/-0.1%)
+ if num_plasmid_contigs == int(record['num_contigs']) and \
+ num_plasmid_bases == int(record['total_length']) and \
+ abs(plasmid_gc_percent - float(record['gc'])) < 0.1:
+ for reference_plasmid in args.reference_plasmids_genbank:
if parse_genbank_accession(reference_plasmid) == record['mash_nearest_neighbor']:
shutil.copy2(reference_plasmid, os.path.join(args.outdir, "reference_plasmid.gbk"))
- mob_typer_record_writer.writerow(record)
+
+ for reference_plasmid in args.reference_plasmids_fasta:
+ if re.match(record['mash_nearest_neighbor'], parse_fasta_accession(reference_plasmid)) is not None:
+ shutil.copy2(reference_plasmid, os.path.join(args.outdir, "reference_plasmid.fasta"))
+ mob_typer_record_writer.writerow(record)
shutil.copy2(args.plasmid, os.path.join(args.outdir, "plasmid.fasta"))
@@ -114,7 +144,8 @@
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--plasmid", help="plasmid assembly (fasta)")
- parser.add_argument("--reference_plasmids", nargs='+', help="reference plasmids (genbank)")
+ parser.add_argument("--reference_plasmids_genbank", nargs='+', help="reference plasmids (genbank)")
+ parser.add_argument("--reference_plasmids_fasta", nargs='+', help="reference plasmids (fasta)")
parser.add_argument("--mob_typer_report", help="mob_typer reports (tsv)")
parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory")
args = parser.parse_args()
diff -r d56b4f743779 -r 826ddf832bef match_plasmid_to_reference.xml
--- a/match_plasmid_to_reference.xml Wed Nov 06 01:20:36 2019 -0500
+++ b/match_plasmid_to_reference.xml Wed Nov 06 13:52:40 2019 -0500
@@ -5,8 +5,12 @@
-
+
+
-
+
+
-
+
+
+
+
+
+
+
-
+
+
diff -r d56b4f743779 -r 826ddf832bef test-data/CP008719.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CP008719.fa Wed Nov 06 13:52:40 2019 -0500
@@ -0,0 +1,33 @@
+>CP008719.1 Escherichia coli strain ST648 plasmid pEC648_5, complete sequence
+TAGATTTAAACGGTATCAAGTTTGGATTTTTAAGAACGCATTCTTAGTTCTGGAAAAGAGCCAGCGGCAG
+GCTGAGGTGATAGGTACGAGATTGCATGCAATCTCTAGTGCTCTGTCTATCCTGCATTATCCTCAGCATT
+ATCCTCAGCATTATCCTCAGCCTTGCCAACTCGACACCAATGCAGGATAGACAATCCGATGTCAAATGTT
+AACACTCTGCGAGTGGTACATTTTCCCCGGATTATCGTCCTGAGCCTGCCGCTGGCTCTCTTTCTACCGC
+CTCGCTTTGCTCGTTGCTCAACGCCTCACAGACACGGATTAAAATCCGCATCCGTTCACCGTTTTTTAAA
+GTCCGTTAAAAGCATGATGCCATCTCCGAGAGTTAATCTCGTCAAATGCTAAATCGTGGGGGTCCCCTTT
+GGGGTTCCGATTTAGTGATTGACGACACCACCGATTAAAAAACTTATGCGGGGTGGATGGTTTCACGAAG
+TGAGGCCATCCACCTGTAAGACAGGGTTTTGTTTTTATTCCCTGTTTTGGTGATCGGGTGTGTGGAAAAG
+GTTGGGGTAAGCCGTTCGGGGGTGCTTGTTTTGGGGGGTTAAAATTGTGGTTATTTTTTGCGCAATTCTC
+GCGCGTGATCCTTGTATTTATACTTAAGGGATAAATGGCGGATATGAAATAGTGGTTTAGCCCAGTAATG
+ACGAGGCTTTGAGTGGGTTTTGACAGGTCAAAGAAAATGGAGCAGAATTGAGGCGTTTTTAATCGGCGTT
+GGGGAGTGCGTCAACACTCCCCAACATTTCGAATGTGTCACCTCAGCGGCAAACTCTGGTGACATGTACT
+GGCTCGCAATGCACAGGTACGTGATGAATATACCACATCAAATCACAGCCTGCCCAGATCGGAGCAGGCT
+TAATGTCAGAAGATAAATTCCTTTCGGACTACAGCCCCCGTGATGCAGTTTGGGATACCCAGCGCACGCT
+TACCGATTCTGTCGGGGGTATCTACCAGACTGCTGCTGAATTCGAGCGCTATGCACTCCGTATGGCCTCC
+TGTAGCGGTTTGTTACGTTTTGGTTGGTCTACCATCATGGAAACCGGAGAAACGCGCCTACGGCTTCGTA
+GTGCGCAATTTTGCCGTGTCCGTCATTGCCCTGTCTGCCAGTGGAGAAGAACCCTCATGTGGCAAGCCCG
+TTTTTATCAGGCTCTACCGAAAATCGTTGTGGATTACCCGTCTTCCCGATGGTTGTTTCTGACGTTAACT
+GTCAGGAACTGCGAGATAGGTGAACTTGGAACAGTCCTTACAGCAATGAATGCGGCGTTTAAGCGAATGG
+AAAAGCGAAAGGAGCTATCACCTGTTCAGGGGTGGATCAGGGCTACGGAGGTGACGCGAGGTAAGGATGG
+CAGCGCACATCCGCATTTTCACTGTCTGCTGATGGTGCAACCTTCTTGGTTTAAAGGGAAGAACTACGTT
+AAGCACGAACGTTGGGTAGAACTCTGGCGCGATTGCTTGCGGGTGAACTATGAGCCGAATATCGATATTC
+GGGCAGTAAAAACTAAGACAGGTGAGGTTGTGGCCAACGTTGCCGAGCAACTGCAAAGCGCGGTTGCTGA
+AACGCTGAAATACTCCGTTAAACCGGAAGATATGGCAAACGATCCTGAGTGGTTTCTTGAGCTGACGCGG
+CAGCTTCACAAGCGCCGTTTTATCTCGACCGGTGGGGCGCTAAAAAACGTCCTCCAGTTGGATCGAGAAA
+CCAATGAGGATCTTGTCATTGCCGACGATGTAGGGGATGGCACTGATGACGGGAAGCGGACGGCGTTTGT
+CTGGGATTCAGGTAAACGGCGTTACAAACGCGCCCCTGAGAAGGATAAATCGGATTAACGTATGAATATT
+AATATTGAATACCTGAATGGAAATAAGACTATTGGTTTATTTTTTTTAAGAAGTGAAGCGGTGATTCCTG
+ACAGGTTTAAAAACCTTATTTTGCTTATTGATGGATTAAGTTTTGGCACATTTGGTTTTCATCCGCACGA
+AGGTTTTGAGGATGAATTAATTTTATATATTCAGAAAACAAACGAGAGGGTAAAAACTCTTTTTGTGAAA
+A
+
diff -r d56b4f743779 -r 826ddf832bef test-data/JQ739157.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/JQ739157.fa Wed Nov 06 13:52:40 2019 -0500
@@ -0,0 +1,96 @@
+>JQ739157.2 Acinetobacter pittii strain ABCA95 plasmid pABCA95, complete sequence
+AAGCTTATAGCAGTGTCACAGATGCGAAAAAGCAATTAAGTGCATATTTTGAGTTTTATAATTTGAAACG
+ACCTCATTCGAGTCTAGACAAAATGACACCAAATGAGTTTTACTATGATCAGCTACCCCAACAAAACAAG
+GTGGCTTAACTAGAGCGGAATATCACTTATAAATACGCTTTTAGTTGTTCAAACAAGTGGGACCACCTCT
+CTCGCATTTGCGGGGTTTTTAATGCTGAATAAAAGGAAAACTTGATGGAATTGCCCAATATTATTCAACA
+ATTTATTGGAAACAGCGTTTTAGAGCCAAATAAAATTGGTCAGTCGCCATCGGATGTTTATTCTTTTAAT
+CGAAATAATGAAACTTTTTTTCTTAAGCGATCTAGCACTTTATATACAGAGACCACATACAGTGTCTCTC
+GCGAAGCGAAAATGTTGAGTTGGCTCTCTGAGAAATTAAAGGTGCCTGAACTCATCATGACTTTTCAGGA
+TGAGCAGTTTGAATTAATGATCACTAAAGCGATCAATGCAAAACCAATTTCAGCGCTTTTTTTAACAGAC
+CAAGAATTGCTTGCTATCTATAAGGAGGCACTCAATCTGTTAAATTCAGTTGCTATTATTGATTGTCCAT
+TTATTTCAAACATTGATCATCGGTTAAAAGAGTCAAAATTTTTTATTGATAACCAACTCCTTGACGATAT
+AGATCAAGATGATTTTGACGCTGAATTATGGGGAGACCATAGAACTTACCTAAGTCTATGGAATGAGTTA
+ACTGAGACTCGTGTTGAAGAAAGATTGGTTTTTTCTCATGGCGATATCACGGATAGTAATATTTTTATAG
+ATAAATTCAATGAAATTTACTTTTTAGATCTTGGCCGTGCTGGGTTAGCTGATGAATTTGTAGATATATC
+CTTTGTTGAACGTTGCCTAAGAGAGGATGCCTCGGAGGAAACTGCTAAAATATTTTTAAAGCATTTAAAA
+AATGATAGACCTGACAAAAGGAATTATTTTTTAAAACTTGATGAATTGAATTGATTCTAAGCATTATCTA
+AAAATACTTAATTGTCTTTTAACGTCGCTAAATTTTAAATAAATAAGTGAAGAGTGTTAGTGGAGCCACT
+GATTTAAAGTTGGCAGAGTAAAACTTGAAGTGCGACATAAACCACCTAATTAATTTAAAGGGTTTATGGA
+GTATATAAAATTGTCATACCATCATCTTAACTTTGAAGATCGTACTGCATTAATGCTTGAGTCAAGAAAA
+GAAGGCTTTTCAGCCAGAAAATTTGCTGAACTCATTAAAAGACATCCTAGTACGATCTATCGTGAGCTTA
+AAAGAAATAGCATCAATGACGTTTATCAAGCTCGATATGCTTCTGATAACACCTTCGCTAGACGTAGACG
+TGGTCACAGAAAACTCAAAATCGATTCAATCCTCTGGAAATTTATTGTTGAAGCGATCCGTTGTTTATGG
+TCTCCTCAGCAAATAGCAAAGCGTTTAAAGACATTTCCTGATTTGGATCAAACAATGAATGTAAGCCATA
+CAACGATTTATTCAACGATACGAGCATTACCAAAGGGTGAGTTGAAAAAAGACTTATTATCCTGTCTACG
+TCATGAAAATAAAAAGCGAAAAGCTAACGGTGAACCTAAAAAAGATTCTATATTACAGGATATTAAAACT
+ATTCATGAGCGCCCAGCCGAAGTTCAAGAAAGAAAAATACCGGGTCATTGGGAAGCTGATTTAATTAAAG
+GTAAAGACAATAAAAGTTCGATAGCAACACTTATTGAACGAAATACACGGCTCTGTATCTTGGCAACATT
+ACCTGATGCAAAGGCAGAATCAGTGCGCAAGGCTTTAACTGAAGCTCTGAAATATTTACCTGCAGAACTG
+CGTAAAACGTTGACCTATGACCGTGGACGTGAGATGTCAGAACATAAAATACTCGAAGAAGATTTAGGCA
+TAGATGTATATTTCTGTGACCCACATTCACCCTGGCAAAAAGGCACATGCGAAAATATGAATGGTTTAAT
+TAGGCAATATTTACCTAAAGGGATTGATTTAAATCAGGCAGATCAGCATTATTTAAATCAAGTTGCCATG
+TCACTGAATACTCGTCCTAGAAAGGCGTTAGATTGGCTTACACCATTAGAGAAATTTGCTCAGCTTGTTG
+ATTATCATATGGCTTTTGAAACTGTCGCACCTCATGTTTGAATTCGCCCCATATTTTTGCTACAGTGAAC
+CAAATTAAGATCATCTATTTACTAGGCCTCGCATTTGCGGGGTTTTTAATGCTGAATAAAAGGAAAACTT
+GATGGAATTGCCCAATATTATGCACCCGGTCGCGAAGCTGAGCACCGCATTAGCCGCTGCATTGATGCTG
+AGCGGGTGCATGCCCGGTGAAATCCGCCCGACGATTGGCCAGCAAATGGAAACTGGCGACCAACGGTTTG
+GCGATCTGGTTTTCCGCCAGCTCGCACCGAATGTCTGGCAGCACACTTCCTATCTCGACATGCCGGGTTT
+CGGGGCAGTCGCTTCCAACGGTTTGATCGTCAGGGATGGCGGCCGCGTGCTGGTGGTCGATACCGCCTGG
+ACCGATGACCAGACCGCCCAGATCCTCAACTGGATCAAGCAGGAGATCAACCTGCCGGTCGCGCTGGCGG
+TGGTGACTCACGCGCATCAGGACAAGATGGGCGGTATGGACGCGCTGCATGCGGCGGGGATTGCGACTTA
+TGCCAATGCGTTGTCGAACCAGCTTGCCCCGCAAGAGGGGATGGTTGCGGCGCAACACAGCCTGACTTTC
+GCCGCCAATGGCTGGGTCGAACCAGCAACCGCGCCCAACTTTGGCCCGCTCAAGGTATTTTACCCCGGCC
+CCGGCCACACCAGTGACAATATCACCGTTGGGATCGACGGCACCGACATCGCTTTTGGTGGCTGCCTGAT
+CAAGGACAGCAAGGCCAAGTCGCTCGGCAATCTCGGTGATGCCGACACTGAGCACTACGCCGCGTCAGCG
+CGCGCGTTTGGTGCGGCGTTCCCCAAGGCCAGCATGATCGTGATGAGCCATTCCGCCCCCGATAGCCGCG
+CCGCAATCACTCATACGGCCCGCATGGCCGACAAGCTGCGCTGAGCCATGGCTGACCACGTCACCCCCAA
+TCTGCCATCGCGCGATTTCGATGTGACAGAGGCGTTTTATGCGAAGCTGGGCTTTGCGACGAGTTGGAAG
+GATCGCGGCTGGATGATCCTGCAGCGCGGCGGTTTGCAGCTCGAATTCTTCCCCTATCCTGACCTCGACC
+CAGCTACGAGCTCGTTCGGCTGTTGCCTGCGGTTGGATGATCTCGATGCCATGGTGGCATTGGTGAACGC
+GGCGGGAGCCGAGGAAAAAAGCACCGGCTGGCCGCGCTTCAAAGCTCCGCAACTGGAGGCGAGCGGCCTG
+AGGATCGGCTACCTGATCGATCCCGACTGCACGCTGGTGCGGCTGATCCAGAACCCCGACTGACCGCATG
+CCCGCGAAAATCAAGATTTGCGGGATCAGCACACCCGAGGCGCTCGATGCGACCATCGCGGCGCGGGCGG
+ACTATGCCGGGTTGGTGTTCTATCCAGCGTCGCCCCGTGCGGTTACGTCGAATGTCGCGGGCGCTTTGAC
+ATCGCGCGCAGCTGGCCAGATCGCCATGGTCGGTTTGTTCGTCGATGCGGATGATGCTGTCATCGCCGAC
+GCACTGGTGGCAGCCAAGCTGAACGCGCTGCAGCTGCACGGTTCGGAATCGCCCGAACGCGTGGCCCAGT
+TGCGCGCGCGGTTTGGCAAGCCGGTGTGGAAGGCGCTGCCCGTCGCCAGCGCCAGCGATGTCGCACGCGC
+CGCAGCCTATGCCGGGGCGGCGGACTTGATCTTGTTCGACGCCAAGACCCCCAAAGGCGCGCTGCCCGGC
+GGCATGGGGTTGGCGTTCGACTGGTCGCTGCTGGCCGGATATCGCGGTGCCTTGCCGTGGGGGCTGGCAG
+GCGGGCTAAATCCGACGAATGTTGCCGAGGCGATTGCGCGCACCGGAGCGCCGCTGGTCGATACCTCCAG
+CGGCGTCGAAAGCGCGCCGGGCGTCAAGGATACCGACAAGATTACCAATTTCGCCTTTGCGGTGCGCTTG
+GCCTAAATCGCGTCGATCAATAGGCGTCGTTCAGCGCAAAGATCGGCTTGCGGGTGCGCCACTGCCCTCG
+GGTGAAGTCGGGAAAATCTAACGTGCGATTGCCCTCAGCAATCGATTGTTCCGACAGAGGCGTGATCGCG
+CTCCAGGCCAGCGCGTCGTAAATGTCGATTGGCATCGGGGCCTTGGCCTTCAGCGCCTCGACAAAAGCGT
+GGATCACGAACCAGTCCATCCCGCCATGCCCGGCCCCTGCCGCCAGATCGGCGTAGCGTTTCCATAGCGG
+GTGATCGTATTTCGCAAACCAGCCCTCGGCAGGCTCCCAGCGGTGCGGCTGTGGGCTCTTGCCCTCCAGA
+TAGATCGACTTGTTGACGTCCATCCACAGCCCCTCGGTGCCTTGCACCCGAAAGCCGAGAGAATAGGGGC
+GCGGCAGCGAGGTGTCGTGGCACAGCATGATCGTTTCACCATTAGTGCAGCCGATCATGGTGTTGACCAC
+ATCACCCAGTGCGAATTTCACCTCGGCGTTGGGATGATCGGCAGAGCCGTTCTTGACGACATAATCATGC
+AGCCCGCGCGCCTTACAGCCGAAGCCGCCAGCGCCCGCTTCGCCCGGCAACGCGACCTTCAGGGTGCGGG
+TCTGCGGCGGGTAGCACACGCCGGCATCGGCGCAGCCCTGGTACTTCACGGTCAGGGTGGTCGCGCTCGC
+GCCGGCCGCGGGCGTGCCGGTGAGGGTGCCGAGCAATTCCTTGCGGTAGGTTTCGACGTCGCCGAAGAAT
+TCGTCGCGGTAGGCCTTGCCCTTCGGCAGCGCCATGGTCGCGCCGGTGAAGGCGGCATCGGCCTTGACCG
+AGGTGCGGTGCCGGTACAGGTAATAGCCGTCGGCGATCCGCCAGCGCACCTCGATGCGGTCCGGCGCGGT
+GGCCTGCGCGGACAGGACGAAGACCTCGTCGACCGGCGGCAGTTCGAAGTCCTGGGCGACGGCCGAGGTC
+GCGGGCAGCGCAAGCAGCAGGGCGAGCCCGGCCAGCCAGCGGCGCAGGCGGATCGTGGATGCGGTCATTG
+GCTCAGTTTACCGGTCGGCTCTCGGCGGCCAGCCATTGCAGGTATTCGGGCAGGCCGGACGCGGCTTCGA
+CCGCGAGCAGCTCCGGGAGTTCGTAGGGATATAGTATTTTATTAAATTCTTATGGGAAATGACGAATGTT
+AAATTATCTTAAGAGCTTTAATAATATCAATACTTATTTGATTTTATCGATAATTCTGCTGTTAATCATA
+ATATCTCTAGATTATTTCTAAACTGAATGAATGTTTATAATGAGTGATTCATATTGCTATTGAAATCGCC
+TTCTCACTTTGAAAGAAGGCGAGGATGAGGGACTTTTATGTTGAATTATCATTTTAAAAATGCCTTATAA
+AAGAAGCTTAATGTGTTTTCTTATATAGGTTTAAACATAATTGTTGTATATCTTAAATCCAATTGATCTT
+AAAATTTTCCTTTATTTTTTGTTATGAGTGCGAGAAAATTGTCAAAAAGGTCAATCAGACTGGGCGTTAA
+TTTGTTTTGCATACTTTTTCCTATATCGAATTAAAGTCATATAACTAACACCATAATCTTTAGCTATTTG
+AGTGAAAGGGTATGAATCGTCCTTATTTTTAAGGGTATGAATTAACTCTTTTAGTTTTTCTTCTGTAATC
+GCAGGCGATCTTCCCTTGTATTTACCTTTCTTTTTTTTAGCTAATTTAATTCCCTCTGCTTGATTCTCAC
+TAATAATACCCCTTTCAAGTTCAGCTACAGCGCCTAATACATGGAGTTGAAACTTATCGAACTTGTCATC
+TGAATTGGGGGTAAAGTTCAGGTTATTTTTGACAATATGAACAGACACTCCTTTTTTATTTAGCTTTTGA
+ACAATGGTTACAAGGTCAATCAAGCTACGTGCCAATCTAAAAACATCATGAGCGTACACAATGTCCCCAC
+TACGGACATAATCGAACATTTCCTGAAGTGCAGGGCGTTTGGCAGTCTTTCCGCTAAAATGATCAATAAA
+AGTTTTATCTAGCTCAAAGGGTAGATCATGGAGCTGTCTTTCAGGGTTTTGGTCTTTAGTGGATACACGG
+ATATACCCCACTCTTTGAAAGGGTGTGTTTTTAATTTGATCTTCAATATCTAAATTTTCTTTTTCCATAA
+CCAGTATAACAAAATTAGATAACCTCAATGTTATATCACATTAGATTAACAAAACAACCCTATTGTTATA
+GGGTTTTTAGGGTGTATTATTATATAACAATAGGGTATACCCTATTGTTATATATCTTCAGGTATAAGGA
+AAAATAACGATGATTAATTTTAATGATCTAAGCGAATCTGAATTATTAAGGATTGCACAGACTGGCATAT
+CAAACCGTATAGGATTGCGTACTTCAGGACATTG
+
diff -r d56b4f743779 -r 826ddf832bef test-data/outdir/reference_plasmid.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outdir/reference_plasmid.fasta Wed Nov 06 13:52:40 2019 -0500
@@ -0,0 +1,96 @@
+>JQ739157.2 Acinetobacter pittii strain ABCA95 plasmid pABCA95, complete sequence
+AAGCTTATAGCAGTGTCACAGATGCGAAAAAGCAATTAAGTGCATATTTTGAGTTTTATAATTTGAAACG
+ACCTCATTCGAGTCTAGACAAAATGACACCAAATGAGTTTTACTATGATCAGCTACCCCAACAAAACAAG
+GTGGCTTAACTAGAGCGGAATATCACTTATAAATACGCTTTTAGTTGTTCAAACAAGTGGGACCACCTCT
+CTCGCATTTGCGGGGTTTTTAATGCTGAATAAAAGGAAAACTTGATGGAATTGCCCAATATTATTCAACA
+ATTTATTGGAAACAGCGTTTTAGAGCCAAATAAAATTGGTCAGTCGCCATCGGATGTTTATTCTTTTAAT
+CGAAATAATGAAACTTTTTTTCTTAAGCGATCTAGCACTTTATATACAGAGACCACATACAGTGTCTCTC
+GCGAAGCGAAAATGTTGAGTTGGCTCTCTGAGAAATTAAAGGTGCCTGAACTCATCATGACTTTTCAGGA
+TGAGCAGTTTGAATTAATGATCACTAAAGCGATCAATGCAAAACCAATTTCAGCGCTTTTTTTAACAGAC
+CAAGAATTGCTTGCTATCTATAAGGAGGCACTCAATCTGTTAAATTCAGTTGCTATTATTGATTGTCCAT
+TTATTTCAAACATTGATCATCGGTTAAAAGAGTCAAAATTTTTTATTGATAACCAACTCCTTGACGATAT
+AGATCAAGATGATTTTGACGCTGAATTATGGGGAGACCATAGAACTTACCTAAGTCTATGGAATGAGTTA
+ACTGAGACTCGTGTTGAAGAAAGATTGGTTTTTTCTCATGGCGATATCACGGATAGTAATATTTTTATAG
+ATAAATTCAATGAAATTTACTTTTTAGATCTTGGCCGTGCTGGGTTAGCTGATGAATTTGTAGATATATC
+CTTTGTTGAACGTTGCCTAAGAGAGGATGCCTCGGAGGAAACTGCTAAAATATTTTTAAAGCATTTAAAA
+AATGATAGACCTGACAAAAGGAATTATTTTTTAAAACTTGATGAATTGAATTGATTCTAAGCATTATCTA
+AAAATACTTAATTGTCTTTTAACGTCGCTAAATTTTAAATAAATAAGTGAAGAGTGTTAGTGGAGCCACT
+GATTTAAAGTTGGCAGAGTAAAACTTGAAGTGCGACATAAACCACCTAATTAATTTAAAGGGTTTATGGA
+GTATATAAAATTGTCATACCATCATCTTAACTTTGAAGATCGTACTGCATTAATGCTTGAGTCAAGAAAA
+GAAGGCTTTTCAGCCAGAAAATTTGCTGAACTCATTAAAAGACATCCTAGTACGATCTATCGTGAGCTTA
+AAAGAAATAGCATCAATGACGTTTATCAAGCTCGATATGCTTCTGATAACACCTTCGCTAGACGTAGACG
+TGGTCACAGAAAACTCAAAATCGATTCAATCCTCTGGAAATTTATTGTTGAAGCGATCCGTTGTTTATGG
+TCTCCTCAGCAAATAGCAAAGCGTTTAAAGACATTTCCTGATTTGGATCAAACAATGAATGTAAGCCATA
+CAACGATTTATTCAACGATACGAGCATTACCAAAGGGTGAGTTGAAAAAAGACTTATTATCCTGTCTACG
+TCATGAAAATAAAAAGCGAAAAGCTAACGGTGAACCTAAAAAAGATTCTATATTACAGGATATTAAAACT
+ATTCATGAGCGCCCAGCCGAAGTTCAAGAAAGAAAAATACCGGGTCATTGGGAAGCTGATTTAATTAAAG
+GTAAAGACAATAAAAGTTCGATAGCAACACTTATTGAACGAAATACACGGCTCTGTATCTTGGCAACATT
+ACCTGATGCAAAGGCAGAATCAGTGCGCAAGGCTTTAACTGAAGCTCTGAAATATTTACCTGCAGAACTG
+CGTAAAACGTTGACCTATGACCGTGGACGTGAGATGTCAGAACATAAAATACTCGAAGAAGATTTAGGCA
+TAGATGTATATTTCTGTGACCCACATTCACCCTGGCAAAAAGGCACATGCGAAAATATGAATGGTTTAAT
+TAGGCAATATTTACCTAAAGGGATTGATTTAAATCAGGCAGATCAGCATTATTTAAATCAAGTTGCCATG
+TCACTGAATACTCGTCCTAGAAAGGCGTTAGATTGGCTTACACCATTAGAGAAATTTGCTCAGCTTGTTG
+ATTATCATATGGCTTTTGAAACTGTCGCACCTCATGTTTGAATTCGCCCCATATTTTTGCTACAGTGAAC
+CAAATTAAGATCATCTATTTACTAGGCCTCGCATTTGCGGGGTTTTTAATGCTGAATAAAAGGAAAACTT
+GATGGAATTGCCCAATATTATGCACCCGGTCGCGAAGCTGAGCACCGCATTAGCCGCTGCATTGATGCTG
+AGCGGGTGCATGCCCGGTGAAATCCGCCCGACGATTGGCCAGCAAATGGAAACTGGCGACCAACGGTTTG
+GCGATCTGGTTTTCCGCCAGCTCGCACCGAATGTCTGGCAGCACACTTCCTATCTCGACATGCCGGGTTT
+CGGGGCAGTCGCTTCCAACGGTTTGATCGTCAGGGATGGCGGCCGCGTGCTGGTGGTCGATACCGCCTGG
+ACCGATGACCAGACCGCCCAGATCCTCAACTGGATCAAGCAGGAGATCAACCTGCCGGTCGCGCTGGCGG
+TGGTGACTCACGCGCATCAGGACAAGATGGGCGGTATGGACGCGCTGCATGCGGCGGGGATTGCGACTTA
+TGCCAATGCGTTGTCGAACCAGCTTGCCCCGCAAGAGGGGATGGTTGCGGCGCAACACAGCCTGACTTTC
+GCCGCCAATGGCTGGGTCGAACCAGCAACCGCGCCCAACTTTGGCCCGCTCAAGGTATTTTACCCCGGCC
+CCGGCCACACCAGTGACAATATCACCGTTGGGATCGACGGCACCGACATCGCTTTTGGTGGCTGCCTGAT
+CAAGGACAGCAAGGCCAAGTCGCTCGGCAATCTCGGTGATGCCGACACTGAGCACTACGCCGCGTCAGCG
+CGCGCGTTTGGTGCGGCGTTCCCCAAGGCCAGCATGATCGTGATGAGCCATTCCGCCCCCGATAGCCGCG
+CCGCAATCACTCATACGGCCCGCATGGCCGACAAGCTGCGCTGAGCCATGGCTGACCACGTCACCCCCAA
+TCTGCCATCGCGCGATTTCGATGTGACAGAGGCGTTTTATGCGAAGCTGGGCTTTGCGACGAGTTGGAAG
+GATCGCGGCTGGATGATCCTGCAGCGCGGCGGTTTGCAGCTCGAATTCTTCCCCTATCCTGACCTCGACC
+CAGCTACGAGCTCGTTCGGCTGTTGCCTGCGGTTGGATGATCTCGATGCCATGGTGGCATTGGTGAACGC
+GGCGGGAGCCGAGGAAAAAAGCACCGGCTGGCCGCGCTTCAAAGCTCCGCAACTGGAGGCGAGCGGCCTG
+AGGATCGGCTACCTGATCGATCCCGACTGCACGCTGGTGCGGCTGATCCAGAACCCCGACTGACCGCATG
+CCCGCGAAAATCAAGATTTGCGGGATCAGCACACCCGAGGCGCTCGATGCGACCATCGCGGCGCGGGCGG
+ACTATGCCGGGTTGGTGTTCTATCCAGCGTCGCCCCGTGCGGTTACGTCGAATGTCGCGGGCGCTTTGAC
+ATCGCGCGCAGCTGGCCAGATCGCCATGGTCGGTTTGTTCGTCGATGCGGATGATGCTGTCATCGCCGAC
+GCACTGGTGGCAGCCAAGCTGAACGCGCTGCAGCTGCACGGTTCGGAATCGCCCGAACGCGTGGCCCAGT
+TGCGCGCGCGGTTTGGCAAGCCGGTGTGGAAGGCGCTGCCCGTCGCCAGCGCCAGCGATGTCGCACGCGC
+CGCAGCCTATGCCGGGGCGGCGGACTTGATCTTGTTCGACGCCAAGACCCCCAAAGGCGCGCTGCCCGGC
+GGCATGGGGTTGGCGTTCGACTGGTCGCTGCTGGCCGGATATCGCGGTGCCTTGCCGTGGGGGCTGGCAG
+GCGGGCTAAATCCGACGAATGTTGCCGAGGCGATTGCGCGCACCGGAGCGCCGCTGGTCGATACCTCCAG
+CGGCGTCGAAAGCGCGCCGGGCGTCAAGGATACCGACAAGATTACCAATTTCGCCTTTGCGGTGCGCTTG
+GCCTAAATCGCGTCGATCAATAGGCGTCGTTCAGCGCAAAGATCGGCTTGCGGGTGCGCCACTGCCCTCG
+GGTGAAGTCGGGAAAATCTAACGTGCGATTGCCCTCAGCAATCGATTGTTCCGACAGAGGCGTGATCGCG
+CTCCAGGCCAGCGCGTCGTAAATGTCGATTGGCATCGGGGCCTTGGCCTTCAGCGCCTCGACAAAAGCGT
+GGATCACGAACCAGTCCATCCCGCCATGCCCGGCCCCTGCCGCCAGATCGGCGTAGCGTTTCCATAGCGG
+GTGATCGTATTTCGCAAACCAGCCCTCGGCAGGCTCCCAGCGGTGCGGCTGTGGGCTCTTGCCCTCCAGA
+TAGATCGACTTGTTGACGTCCATCCACAGCCCCTCGGTGCCTTGCACCCGAAAGCCGAGAGAATAGGGGC
+GCGGCAGCGAGGTGTCGTGGCACAGCATGATCGTTTCACCATTAGTGCAGCCGATCATGGTGTTGACCAC
+ATCACCCAGTGCGAATTTCACCTCGGCGTTGGGATGATCGGCAGAGCCGTTCTTGACGACATAATCATGC
+AGCCCGCGCGCCTTACAGCCGAAGCCGCCAGCGCCCGCTTCGCCCGGCAACGCGACCTTCAGGGTGCGGG
+TCTGCGGCGGGTAGCACACGCCGGCATCGGCGCAGCCCTGGTACTTCACGGTCAGGGTGGTCGCGCTCGC
+GCCGGCCGCGGGCGTGCCGGTGAGGGTGCCGAGCAATTCCTTGCGGTAGGTTTCGACGTCGCCGAAGAAT
+TCGTCGCGGTAGGCCTTGCCCTTCGGCAGCGCCATGGTCGCGCCGGTGAAGGCGGCATCGGCCTTGACCG
+AGGTGCGGTGCCGGTACAGGTAATAGCCGTCGGCGATCCGCCAGCGCACCTCGATGCGGTCCGGCGCGGT
+GGCCTGCGCGGACAGGACGAAGACCTCGTCGACCGGCGGCAGTTCGAAGTCCTGGGCGACGGCCGAGGTC
+GCGGGCAGCGCAAGCAGCAGGGCGAGCCCGGCCAGCCAGCGGCGCAGGCGGATCGTGGATGCGGTCATTG
+GCTCAGTTTACCGGTCGGCTCTCGGCGGCCAGCCATTGCAGGTATTCGGGCAGGCCGGACGCGGCTTCGA
+CCGCGAGCAGCTCCGGGAGTTCGTAGGGATATAGTATTTTATTAAATTCTTATGGGAAATGACGAATGTT
+AAATTATCTTAAGAGCTTTAATAATATCAATACTTATTTGATTTTATCGATAATTCTGCTGTTAATCATA
+ATATCTCTAGATTATTTCTAAACTGAATGAATGTTTATAATGAGTGATTCATATTGCTATTGAAATCGCC
+TTCTCACTTTGAAAGAAGGCGAGGATGAGGGACTTTTATGTTGAATTATCATTTTAAAAATGCCTTATAA
+AAGAAGCTTAATGTGTTTTCTTATATAGGTTTAAACATAATTGTTGTATATCTTAAATCCAATTGATCTT
+AAAATTTTCCTTTATTTTTTGTTATGAGTGCGAGAAAATTGTCAAAAAGGTCAATCAGACTGGGCGTTAA
+TTTGTTTTGCATACTTTTTCCTATATCGAATTAAAGTCATATAACTAACACCATAATCTTTAGCTATTTG
+AGTGAAAGGGTATGAATCGTCCTTATTTTTAAGGGTATGAATTAACTCTTTTAGTTTTTCTTCTGTAATC
+GCAGGCGATCTTCCCTTGTATTTACCTTTCTTTTTTTTAGCTAATTTAATTCCCTCTGCTTGATTCTCAC
+TAATAATACCCCTTTCAAGTTCAGCTACAGCGCCTAATACATGGAGTTGAAACTTATCGAACTTGTCATC
+TGAATTGGGGGTAAAGTTCAGGTTATTTTTGACAATATGAACAGACACTCCTTTTTTATTTAGCTTTTGA
+ACAATGGTTACAAGGTCAATCAAGCTACGTGCCAATCTAAAAACATCATGAGCGTACACAATGTCCCCAC
+TACGGACATAATCGAACATTTCCTGAAGTGCAGGGCGTTTGGCAGTCTTTCCGCTAAAATGATCAATAAA
+AGTTTTATCTAGCTCAAAGGGTAGATCATGGAGCTGTCTTTCAGGGTTTTGGTCTTTAGTGGATACACGG
+ATATACCCCACTCTTTGAAAGGGTGTGTTTTTAATTTGATCTTCAATATCTAAATTTTCTTTTTCCATAA
+CCAGTATAACAAAATTAGATAACCTCAATGTTATATCACATTAGATTAACAAAACAACCCTATTGTTATA
+GGGTTTTTAGGGTGTATTATTATATAACAATAGGGTATACCCTATTGTTATATATCTTCAGGTATAAGGA
+AAAATAACGATGATTAATTTTAATGATCTAAGCGAATCTGAATTATTAAGGATTGCACAGACTGGCATAT
+CAAACCGTATAGGATTGCGTACTTCAGGACATTG
+