Mercurial > repos > dfornika > match_plasmid_to_reference
changeset 4:826ddf832bef draft default tip
"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/match_plasmid_to_reference commit dcdac86bce5c44043516fbd472ab7c19d7bf4d50-dirty"
author | dfornika |
---|---|
date | Wed, 06 Nov 2019 13:52:40 -0500 |
parents | d56b4f743779 |
children | |
files | match_plasmid_to_reference.py match_plasmid_to_reference.xml test-data/CP008719.fa test-data/JQ739157.fa test-data/outdir/reference_plasmid.fasta |
diffstat | 5 files changed, 291 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/match_plasmid_to_reference.py Wed Nov 06 01:20:36 2019 -0500 +++ b/match_plasmid_to_reference.py Wed Nov 06 13:52:40 2019 -0500 @@ -1,6 +1,6 @@ #!/usr/bin/env python -from __future__ import print_function +from __future__ import print_function, division import argparse import csv @@ -56,34 +56,53 @@ mob_typer_report.append(row) return mob_typer_report -def parse_genbank_accession(genbank_file_path): - with open(genbank_file_path, 'r') as f: +def parse_genbank_accession(genbank_path): + with open(genbank_path, 'r') as f: while True: line = f.readline() - # break while statement if it is not a comment line - # i.e. does not startwith # if line.startswith('ACCESSION'): return line.strip().split()[1] +def parse_fasta_accession(fasta_path): + with open(fasta_path, 'r') as f: + while True: + line = f.readline() + if line.startswith('>'): + return line.strip().split()[0][1:] -def count_contigs(plasmid_fasta_path): +def count_fasta_contigs(fasta_path): contigs = 0 - with open(plasmid_fasta_path, 'r') as f: + with open(fasta_path, 'r') as f: for line in f: if line.startswith('>'): contigs += 1 return contigs -def count_bases(plasmid_fasta_path): +def count_fasta_bases(fasta_path): bases = 0 - with open(plasmid_fasta_path, 'r') as f: + with open(fasta_path, 'r') as f: for line in f: line = line.strip() if not line.startswith('>'): bases += len(line) return bases +def compute_fasta_gc_percent(fasta_path): + gc_count = 0 + total_bases_count = 0 + with open(fasta_path, 'r') as f: + for line in f: + if not line.startswith('>'): + line = line.strip() + line_c_count = line.count('c') + line.count('C') + line_g_count = line.count('g') + line.count('G') + line_total_bases_count = len(line) + gc_count += line_c_count + line_g_count + total_bases_count += line_total_bases_count + return 100 * (gc_count / total_bases_count) + def main(args): + # create output directory try: os.mkdir(args.outdir) @@ -95,18 +114,29 @@ # parse mob_typer report mob_typer_report = parse_mob_typer_report(args.mob_typer_report) - num_plasmid_contigs = count_contigs(args.plasmid) - num_plasmid_bases = count_bases(args.plasmid) - + num_plasmid_contigs = count_fasta_contigs(args.plasmid) + num_plasmid_bases = count_fasta_bases(args.plasmid) + plasmid_gc_percent = compute_fasta_gc_percent(args.plasmid) + with open(os.path.join(args.outdir, 'mob_typer_record.tsv'), 'w') as f: mob_typer_record_writer = csv.DictWriter(f, delimiter="\t", quotechar='"', fieldnames=MOB_TYPER_FIELDNAMES) mob_typer_record_writer.writeheader() for record in mob_typer_report: - if num_plasmid_contigs == int(record['num_contigs']) and num_plasmid_bases == int(record['total_length']): - for reference_plasmid in args.reference_plasmids: + # match the plasmid against three properties in the MOB-Typer report: + # 1. number of contigs + # 2. total length of all contigs + # 3. G/C percent (within +/-0.1%) + if num_plasmid_contigs == int(record['num_contigs']) and \ + num_plasmid_bases == int(record['total_length']) and \ + abs(plasmid_gc_percent - float(record['gc'])) < 0.1: + for reference_plasmid in args.reference_plasmids_genbank: if parse_genbank_accession(reference_plasmid) == record['mash_nearest_neighbor']: shutil.copy2(reference_plasmid, os.path.join(args.outdir, "reference_plasmid.gbk")) - mob_typer_record_writer.writerow(record) + + for reference_plasmid in args.reference_plasmids_fasta: + if re.match(record['mash_nearest_neighbor'], parse_fasta_accession(reference_plasmid)) is not None: + shutil.copy2(reference_plasmid, os.path.join(args.outdir, "reference_plasmid.fasta")) + mob_typer_record_writer.writerow(record) shutil.copy2(args.plasmid, os.path.join(args.outdir, "plasmid.fasta")) @@ -114,7 +144,8 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--plasmid", help="plasmid assembly (fasta)") - parser.add_argument("--reference_plasmids", nargs='+', help="reference plasmids (genbank)") + parser.add_argument("--reference_plasmids_genbank", nargs='+', help="reference plasmids (genbank)") + parser.add_argument("--reference_plasmids_fasta", nargs='+', help="reference plasmids (fasta)") parser.add_argument("--mob_typer_report", help="mob_typer reports (tsv)") parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory") args = parser.parse_args()
--- a/match_plasmid_to_reference.xml Wed Nov 06 01:20:36 2019 -0500 +++ b/match_plasmid_to_reference.xml Wed Nov 06 13:52:40 2019 -0500 @@ -5,8 +5,12 @@ <command detect_errors="exit_code"><![CDATA[ '$__tool_directory__/match_plasmid_to_reference.py' --plasmid '${plasmid}' - --reference_plasmids - #for $reference_plasmid in $reference_plasmids: + --reference_plasmids_genbank + #for $reference_plasmid in $reference_plasmids_genbank: + '${reference_plasmid}' + #end for + --reference_plasmids_fasta + #for $reference_plasmid in $reference_plasmids_fasta: '${reference_plasmid}' #end for --mob_typer_report '${concatenated_mob_typer_reports}' @@ -14,26 +18,35 @@ ]]></command> <inputs> <param name="plasmid" type="data" format="fasta" /> - <param name="reference_plasmids" type="data_collection" collection_type="list" format="genbank" /> + <param name="reference_plasmids_genbank" type="data_collection" collection_type="list" format="genbank" /> + <param name="reference_plasmids_fasta" type="data_collection" collection_type="list" format="fasta" /> <param name="concatenated_mob_typer_reports" type="data" format="tabular" /> </inputs> <outputs> <data name="output_plasmid" from_work_dir="outdir/plasmid.fasta" label="Plasmid" format="fasta"/> - <data name="matched_reference_plasmid" from_work_dir="outdir/reference_plasmid.gbk" label="Reference Plasmid" format="genbank"/> + <data name="matched_reference_plasmid_genbank" from_work_dir="outdir/reference_plasmid.gbk" label="Reference Plasmid (genbank)" format="genbank"/> + <data name="matched_reference_plasmid_fasta" from_work_dir="outdir/reference_plasmid.fasta" label="Reference Plasmid (fasta)" format="fasta"/> <data name="matched_mob_typer_record" from_work_dir="outdir/mob_typer_record.tsv" label="Matched MOB-Typer Record" format="tabular"/> </outputs> <tests> <test> <param name="plasmid" value="SRR9113487_plasmid_2719.fasta"/> - <param name="reference_plasmids"> + <param name="reference_plasmids_genbank"> <collection type="list"> <element name="CP008719" value="CP008719.gbk" ftype="genbank" /> <element name="JQ739157" value="JQ739157.gbk" ftype="genbank" /> </collection> </param> + <param name="reference_plasmids_fasta"> + <collection type="list"> + <element name="CP008719" value="CP008719.fa" ftype="fasta" /> + <element name="JQ739157" value="JQ739157.fa" ftype="fasta" /> + </collection> + </param> <param name="concatenated_mob_typer_reports" value="concatenated_mob_typer_reports.tsv" /> <output name="output_plasmid" file="outdir/plasmid.fasta" /> - <output name="matched_reference_plasmid" file="outdir/reference_plasmid.gbk" /> + <output name="matched_reference_plasmid_genbank" file="outdir/reference_plasmid.gbk" /> + <output name="matched_reference_plasmid_fasta" file="outdir/reference_plasmid.fasta" /> <output name="matched_mob_typer_record" file="outdir/mob_typer_record.tsv" /> </test> </tests>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/CP008719.fa Wed Nov 06 13:52:40 2019 -0500 @@ -0,0 +1,33 @@ +>CP008719.1 Escherichia coli strain ST648 plasmid pEC648_5, complete sequence +TAGATTTAAACGGTATCAAGTTTGGATTTTTAAGAACGCATTCTTAGTTCTGGAAAAGAGCCAGCGGCAG +GCTGAGGTGATAGGTACGAGATTGCATGCAATCTCTAGTGCTCTGTCTATCCTGCATTATCCTCAGCATT +ATCCTCAGCATTATCCTCAGCCTTGCCAACTCGACACCAATGCAGGATAGACAATCCGATGTCAAATGTT +AACACTCTGCGAGTGGTACATTTTCCCCGGATTATCGTCCTGAGCCTGCCGCTGGCTCTCTTTCTACCGC +CTCGCTTTGCTCGTTGCTCAACGCCTCACAGACACGGATTAAAATCCGCATCCGTTCACCGTTTTTTAAA +GTCCGTTAAAAGCATGATGCCATCTCCGAGAGTTAATCTCGTCAAATGCTAAATCGTGGGGGTCCCCTTT +GGGGTTCCGATTTAGTGATTGACGACACCACCGATTAAAAAACTTATGCGGGGTGGATGGTTTCACGAAG +TGAGGCCATCCACCTGTAAGACAGGGTTTTGTTTTTATTCCCTGTTTTGGTGATCGGGTGTGTGGAAAAG +GTTGGGGTAAGCCGTTCGGGGGTGCTTGTTTTGGGGGGTTAAAATTGTGGTTATTTTTTGCGCAATTCTC +GCGCGTGATCCTTGTATTTATACTTAAGGGATAAATGGCGGATATGAAATAGTGGTTTAGCCCAGTAATG +ACGAGGCTTTGAGTGGGTTTTGACAGGTCAAAGAAAATGGAGCAGAATTGAGGCGTTTTTAATCGGCGTT +GGGGAGTGCGTCAACACTCCCCAACATTTCGAATGTGTCACCTCAGCGGCAAACTCTGGTGACATGTACT +GGCTCGCAATGCACAGGTACGTGATGAATATACCACATCAAATCACAGCCTGCCCAGATCGGAGCAGGCT +TAATGTCAGAAGATAAATTCCTTTCGGACTACAGCCCCCGTGATGCAGTTTGGGATACCCAGCGCACGCT +TACCGATTCTGTCGGGGGTATCTACCAGACTGCTGCTGAATTCGAGCGCTATGCACTCCGTATGGCCTCC +TGTAGCGGTTTGTTACGTTTTGGTTGGTCTACCATCATGGAAACCGGAGAAACGCGCCTACGGCTTCGTA +GTGCGCAATTTTGCCGTGTCCGTCATTGCCCTGTCTGCCAGTGGAGAAGAACCCTCATGTGGCAAGCCCG +TTTTTATCAGGCTCTACCGAAAATCGTTGTGGATTACCCGTCTTCCCGATGGTTGTTTCTGACGTTAACT +GTCAGGAACTGCGAGATAGGTGAACTTGGAACAGTCCTTACAGCAATGAATGCGGCGTTTAAGCGAATGG +AAAAGCGAAAGGAGCTATCACCTGTTCAGGGGTGGATCAGGGCTACGGAGGTGACGCGAGGTAAGGATGG +CAGCGCACATCCGCATTTTCACTGTCTGCTGATGGTGCAACCTTCTTGGTTTAAAGGGAAGAACTACGTT +AAGCACGAACGTTGGGTAGAACTCTGGCGCGATTGCTTGCGGGTGAACTATGAGCCGAATATCGATATTC +GGGCAGTAAAAACTAAGACAGGTGAGGTTGTGGCCAACGTTGCCGAGCAACTGCAAAGCGCGGTTGCTGA +AACGCTGAAATACTCCGTTAAACCGGAAGATATGGCAAACGATCCTGAGTGGTTTCTTGAGCTGACGCGG +CAGCTTCACAAGCGCCGTTTTATCTCGACCGGTGGGGCGCTAAAAAACGTCCTCCAGTTGGATCGAGAAA +CCAATGAGGATCTTGTCATTGCCGACGATGTAGGGGATGGCACTGATGACGGGAAGCGGACGGCGTTTGT +CTGGGATTCAGGTAAACGGCGTTACAAACGCGCCCCTGAGAAGGATAAATCGGATTAACGTATGAATATT +AATATTGAATACCTGAATGGAAATAAGACTATTGGTTTATTTTTTTTAAGAAGTGAAGCGGTGATTCCTG +ACAGGTTTAAAAACCTTATTTTGCTTATTGATGGATTAAGTTTTGGCACATTTGGTTTTCATCCGCACGA +AGGTTTTGAGGATGAATTAATTTTATATATTCAGAAAACAAACGAGAGGGTAAAAACTCTTTTTGTGAAA +A +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/JQ739157.fa Wed Nov 06 13:52:40 2019 -0500 @@ -0,0 +1,96 @@ +>JQ739157.2 Acinetobacter pittii strain ABCA95 plasmid pABCA95, complete sequence +AAGCTTATAGCAGTGTCACAGATGCGAAAAAGCAATTAAGTGCATATTTTGAGTTTTATAATTTGAAACG +ACCTCATTCGAGTCTAGACAAAATGACACCAAATGAGTTTTACTATGATCAGCTACCCCAACAAAACAAG +GTGGCTTAACTAGAGCGGAATATCACTTATAAATACGCTTTTAGTTGTTCAAACAAGTGGGACCACCTCT +CTCGCATTTGCGGGGTTTTTAATGCTGAATAAAAGGAAAACTTGATGGAATTGCCCAATATTATTCAACA +ATTTATTGGAAACAGCGTTTTAGAGCCAAATAAAATTGGTCAGTCGCCATCGGATGTTTATTCTTTTAAT +CGAAATAATGAAACTTTTTTTCTTAAGCGATCTAGCACTTTATATACAGAGACCACATACAGTGTCTCTC +GCGAAGCGAAAATGTTGAGTTGGCTCTCTGAGAAATTAAAGGTGCCTGAACTCATCATGACTTTTCAGGA +TGAGCAGTTTGAATTAATGATCACTAAAGCGATCAATGCAAAACCAATTTCAGCGCTTTTTTTAACAGAC +CAAGAATTGCTTGCTATCTATAAGGAGGCACTCAATCTGTTAAATTCAGTTGCTATTATTGATTGTCCAT +TTATTTCAAACATTGATCATCGGTTAAAAGAGTCAAAATTTTTTATTGATAACCAACTCCTTGACGATAT +AGATCAAGATGATTTTGACGCTGAATTATGGGGAGACCATAGAACTTACCTAAGTCTATGGAATGAGTTA +ACTGAGACTCGTGTTGAAGAAAGATTGGTTTTTTCTCATGGCGATATCACGGATAGTAATATTTTTATAG +ATAAATTCAATGAAATTTACTTTTTAGATCTTGGCCGTGCTGGGTTAGCTGATGAATTTGTAGATATATC +CTTTGTTGAACGTTGCCTAAGAGAGGATGCCTCGGAGGAAACTGCTAAAATATTTTTAAAGCATTTAAAA +AATGATAGACCTGACAAAAGGAATTATTTTTTAAAACTTGATGAATTGAATTGATTCTAAGCATTATCTA +AAAATACTTAATTGTCTTTTAACGTCGCTAAATTTTAAATAAATAAGTGAAGAGTGTTAGTGGAGCCACT +GATTTAAAGTTGGCAGAGTAAAACTTGAAGTGCGACATAAACCACCTAATTAATTTAAAGGGTTTATGGA +GTATATAAAATTGTCATACCATCATCTTAACTTTGAAGATCGTACTGCATTAATGCTTGAGTCAAGAAAA +GAAGGCTTTTCAGCCAGAAAATTTGCTGAACTCATTAAAAGACATCCTAGTACGATCTATCGTGAGCTTA +AAAGAAATAGCATCAATGACGTTTATCAAGCTCGATATGCTTCTGATAACACCTTCGCTAGACGTAGACG +TGGTCACAGAAAACTCAAAATCGATTCAATCCTCTGGAAATTTATTGTTGAAGCGATCCGTTGTTTATGG +TCTCCTCAGCAAATAGCAAAGCGTTTAAAGACATTTCCTGATTTGGATCAAACAATGAATGTAAGCCATA +CAACGATTTATTCAACGATACGAGCATTACCAAAGGGTGAGTTGAAAAAAGACTTATTATCCTGTCTACG +TCATGAAAATAAAAAGCGAAAAGCTAACGGTGAACCTAAAAAAGATTCTATATTACAGGATATTAAAACT +ATTCATGAGCGCCCAGCCGAAGTTCAAGAAAGAAAAATACCGGGTCATTGGGAAGCTGATTTAATTAAAG +GTAAAGACAATAAAAGTTCGATAGCAACACTTATTGAACGAAATACACGGCTCTGTATCTTGGCAACATT +ACCTGATGCAAAGGCAGAATCAGTGCGCAAGGCTTTAACTGAAGCTCTGAAATATTTACCTGCAGAACTG +CGTAAAACGTTGACCTATGACCGTGGACGTGAGATGTCAGAACATAAAATACTCGAAGAAGATTTAGGCA +TAGATGTATATTTCTGTGACCCACATTCACCCTGGCAAAAAGGCACATGCGAAAATATGAATGGTTTAAT +TAGGCAATATTTACCTAAAGGGATTGATTTAAATCAGGCAGATCAGCATTATTTAAATCAAGTTGCCATG +TCACTGAATACTCGTCCTAGAAAGGCGTTAGATTGGCTTACACCATTAGAGAAATTTGCTCAGCTTGTTG +ATTATCATATGGCTTTTGAAACTGTCGCACCTCATGTTTGAATTCGCCCCATATTTTTGCTACAGTGAAC +CAAATTAAGATCATCTATTTACTAGGCCTCGCATTTGCGGGGTTTTTAATGCTGAATAAAAGGAAAACTT +GATGGAATTGCCCAATATTATGCACCCGGTCGCGAAGCTGAGCACCGCATTAGCCGCTGCATTGATGCTG +AGCGGGTGCATGCCCGGTGAAATCCGCCCGACGATTGGCCAGCAAATGGAAACTGGCGACCAACGGTTTG +GCGATCTGGTTTTCCGCCAGCTCGCACCGAATGTCTGGCAGCACACTTCCTATCTCGACATGCCGGGTTT +CGGGGCAGTCGCTTCCAACGGTTTGATCGTCAGGGATGGCGGCCGCGTGCTGGTGGTCGATACCGCCTGG +ACCGATGACCAGACCGCCCAGATCCTCAACTGGATCAAGCAGGAGATCAACCTGCCGGTCGCGCTGGCGG +TGGTGACTCACGCGCATCAGGACAAGATGGGCGGTATGGACGCGCTGCATGCGGCGGGGATTGCGACTTA +TGCCAATGCGTTGTCGAACCAGCTTGCCCCGCAAGAGGGGATGGTTGCGGCGCAACACAGCCTGACTTTC +GCCGCCAATGGCTGGGTCGAACCAGCAACCGCGCCCAACTTTGGCCCGCTCAAGGTATTTTACCCCGGCC +CCGGCCACACCAGTGACAATATCACCGTTGGGATCGACGGCACCGACATCGCTTTTGGTGGCTGCCTGAT +CAAGGACAGCAAGGCCAAGTCGCTCGGCAATCTCGGTGATGCCGACACTGAGCACTACGCCGCGTCAGCG +CGCGCGTTTGGTGCGGCGTTCCCCAAGGCCAGCATGATCGTGATGAGCCATTCCGCCCCCGATAGCCGCG +CCGCAATCACTCATACGGCCCGCATGGCCGACAAGCTGCGCTGAGCCATGGCTGACCACGTCACCCCCAA +TCTGCCATCGCGCGATTTCGATGTGACAGAGGCGTTTTATGCGAAGCTGGGCTTTGCGACGAGTTGGAAG +GATCGCGGCTGGATGATCCTGCAGCGCGGCGGTTTGCAGCTCGAATTCTTCCCCTATCCTGACCTCGACC +CAGCTACGAGCTCGTTCGGCTGTTGCCTGCGGTTGGATGATCTCGATGCCATGGTGGCATTGGTGAACGC +GGCGGGAGCCGAGGAAAAAAGCACCGGCTGGCCGCGCTTCAAAGCTCCGCAACTGGAGGCGAGCGGCCTG +AGGATCGGCTACCTGATCGATCCCGACTGCACGCTGGTGCGGCTGATCCAGAACCCCGACTGACCGCATG +CCCGCGAAAATCAAGATTTGCGGGATCAGCACACCCGAGGCGCTCGATGCGACCATCGCGGCGCGGGCGG +ACTATGCCGGGTTGGTGTTCTATCCAGCGTCGCCCCGTGCGGTTACGTCGAATGTCGCGGGCGCTTTGAC +ATCGCGCGCAGCTGGCCAGATCGCCATGGTCGGTTTGTTCGTCGATGCGGATGATGCTGTCATCGCCGAC +GCACTGGTGGCAGCCAAGCTGAACGCGCTGCAGCTGCACGGTTCGGAATCGCCCGAACGCGTGGCCCAGT +TGCGCGCGCGGTTTGGCAAGCCGGTGTGGAAGGCGCTGCCCGTCGCCAGCGCCAGCGATGTCGCACGCGC +CGCAGCCTATGCCGGGGCGGCGGACTTGATCTTGTTCGACGCCAAGACCCCCAAAGGCGCGCTGCCCGGC +GGCATGGGGTTGGCGTTCGACTGGTCGCTGCTGGCCGGATATCGCGGTGCCTTGCCGTGGGGGCTGGCAG +GCGGGCTAAATCCGACGAATGTTGCCGAGGCGATTGCGCGCACCGGAGCGCCGCTGGTCGATACCTCCAG +CGGCGTCGAAAGCGCGCCGGGCGTCAAGGATACCGACAAGATTACCAATTTCGCCTTTGCGGTGCGCTTG +GCCTAAATCGCGTCGATCAATAGGCGTCGTTCAGCGCAAAGATCGGCTTGCGGGTGCGCCACTGCCCTCG +GGTGAAGTCGGGAAAATCTAACGTGCGATTGCCCTCAGCAATCGATTGTTCCGACAGAGGCGTGATCGCG +CTCCAGGCCAGCGCGTCGTAAATGTCGATTGGCATCGGGGCCTTGGCCTTCAGCGCCTCGACAAAAGCGT +GGATCACGAACCAGTCCATCCCGCCATGCCCGGCCCCTGCCGCCAGATCGGCGTAGCGTTTCCATAGCGG +GTGATCGTATTTCGCAAACCAGCCCTCGGCAGGCTCCCAGCGGTGCGGCTGTGGGCTCTTGCCCTCCAGA +TAGATCGACTTGTTGACGTCCATCCACAGCCCCTCGGTGCCTTGCACCCGAAAGCCGAGAGAATAGGGGC +GCGGCAGCGAGGTGTCGTGGCACAGCATGATCGTTTCACCATTAGTGCAGCCGATCATGGTGTTGACCAC +ATCACCCAGTGCGAATTTCACCTCGGCGTTGGGATGATCGGCAGAGCCGTTCTTGACGACATAATCATGC +AGCCCGCGCGCCTTACAGCCGAAGCCGCCAGCGCCCGCTTCGCCCGGCAACGCGACCTTCAGGGTGCGGG +TCTGCGGCGGGTAGCACACGCCGGCATCGGCGCAGCCCTGGTACTTCACGGTCAGGGTGGTCGCGCTCGC +GCCGGCCGCGGGCGTGCCGGTGAGGGTGCCGAGCAATTCCTTGCGGTAGGTTTCGACGTCGCCGAAGAAT +TCGTCGCGGTAGGCCTTGCCCTTCGGCAGCGCCATGGTCGCGCCGGTGAAGGCGGCATCGGCCTTGACCG +AGGTGCGGTGCCGGTACAGGTAATAGCCGTCGGCGATCCGCCAGCGCACCTCGATGCGGTCCGGCGCGGT +GGCCTGCGCGGACAGGACGAAGACCTCGTCGACCGGCGGCAGTTCGAAGTCCTGGGCGACGGCCGAGGTC +GCGGGCAGCGCAAGCAGCAGGGCGAGCCCGGCCAGCCAGCGGCGCAGGCGGATCGTGGATGCGGTCATTG +GCTCAGTTTACCGGTCGGCTCTCGGCGGCCAGCCATTGCAGGTATTCGGGCAGGCCGGACGCGGCTTCGA +CCGCGAGCAGCTCCGGGAGTTCGTAGGGATATAGTATTTTATTAAATTCTTATGGGAAATGACGAATGTT +AAATTATCTTAAGAGCTTTAATAATATCAATACTTATTTGATTTTATCGATAATTCTGCTGTTAATCATA +ATATCTCTAGATTATTTCTAAACTGAATGAATGTTTATAATGAGTGATTCATATTGCTATTGAAATCGCC +TTCTCACTTTGAAAGAAGGCGAGGATGAGGGACTTTTATGTTGAATTATCATTTTAAAAATGCCTTATAA +AAGAAGCTTAATGTGTTTTCTTATATAGGTTTAAACATAATTGTTGTATATCTTAAATCCAATTGATCTT +AAAATTTTCCTTTATTTTTTGTTATGAGTGCGAGAAAATTGTCAAAAAGGTCAATCAGACTGGGCGTTAA +TTTGTTTTGCATACTTTTTCCTATATCGAATTAAAGTCATATAACTAACACCATAATCTTTAGCTATTTG +AGTGAAAGGGTATGAATCGTCCTTATTTTTAAGGGTATGAATTAACTCTTTTAGTTTTTCTTCTGTAATC +GCAGGCGATCTTCCCTTGTATTTACCTTTCTTTTTTTTAGCTAATTTAATTCCCTCTGCTTGATTCTCAC +TAATAATACCCCTTTCAAGTTCAGCTACAGCGCCTAATACATGGAGTTGAAACTTATCGAACTTGTCATC +TGAATTGGGGGTAAAGTTCAGGTTATTTTTGACAATATGAACAGACACTCCTTTTTTATTTAGCTTTTGA +ACAATGGTTACAAGGTCAATCAAGCTACGTGCCAATCTAAAAACATCATGAGCGTACACAATGTCCCCAC +TACGGACATAATCGAACATTTCCTGAAGTGCAGGGCGTTTGGCAGTCTTTCCGCTAAAATGATCAATAAA +AGTTTTATCTAGCTCAAAGGGTAGATCATGGAGCTGTCTTTCAGGGTTTTGGTCTTTAGTGGATACACGG +ATATACCCCACTCTTTGAAAGGGTGTGTTTTTAATTTGATCTTCAATATCTAAATTTTCTTTTTCCATAA +CCAGTATAACAAAATTAGATAACCTCAATGTTATATCACATTAGATTAACAAAACAACCCTATTGTTATA +GGGTTTTTAGGGTGTATTATTATATAACAATAGGGTATACCCTATTGTTATATATCTTCAGGTATAAGGA +AAAATAACGATGATTAATTTTAATGATCTAAGCGAATCTGAATTATTAAGGATTGCACAGACTGGCATAT +CAAACCGTATAGGATTGCGTACTTCAGGACATTG +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outdir/reference_plasmid.fasta Wed Nov 06 13:52:40 2019 -0500 @@ -0,0 +1,96 @@ +>JQ739157.2 Acinetobacter pittii strain ABCA95 plasmid pABCA95, complete sequence +AAGCTTATAGCAGTGTCACAGATGCGAAAAAGCAATTAAGTGCATATTTTGAGTTTTATAATTTGAAACG +ACCTCATTCGAGTCTAGACAAAATGACACCAAATGAGTTTTACTATGATCAGCTACCCCAACAAAACAAG +GTGGCTTAACTAGAGCGGAATATCACTTATAAATACGCTTTTAGTTGTTCAAACAAGTGGGACCACCTCT +CTCGCATTTGCGGGGTTTTTAATGCTGAATAAAAGGAAAACTTGATGGAATTGCCCAATATTATTCAACA +ATTTATTGGAAACAGCGTTTTAGAGCCAAATAAAATTGGTCAGTCGCCATCGGATGTTTATTCTTTTAAT +CGAAATAATGAAACTTTTTTTCTTAAGCGATCTAGCACTTTATATACAGAGACCACATACAGTGTCTCTC +GCGAAGCGAAAATGTTGAGTTGGCTCTCTGAGAAATTAAAGGTGCCTGAACTCATCATGACTTTTCAGGA +TGAGCAGTTTGAATTAATGATCACTAAAGCGATCAATGCAAAACCAATTTCAGCGCTTTTTTTAACAGAC +CAAGAATTGCTTGCTATCTATAAGGAGGCACTCAATCTGTTAAATTCAGTTGCTATTATTGATTGTCCAT +TTATTTCAAACATTGATCATCGGTTAAAAGAGTCAAAATTTTTTATTGATAACCAACTCCTTGACGATAT +AGATCAAGATGATTTTGACGCTGAATTATGGGGAGACCATAGAACTTACCTAAGTCTATGGAATGAGTTA +ACTGAGACTCGTGTTGAAGAAAGATTGGTTTTTTCTCATGGCGATATCACGGATAGTAATATTTTTATAG +ATAAATTCAATGAAATTTACTTTTTAGATCTTGGCCGTGCTGGGTTAGCTGATGAATTTGTAGATATATC +CTTTGTTGAACGTTGCCTAAGAGAGGATGCCTCGGAGGAAACTGCTAAAATATTTTTAAAGCATTTAAAA +AATGATAGACCTGACAAAAGGAATTATTTTTTAAAACTTGATGAATTGAATTGATTCTAAGCATTATCTA +AAAATACTTAATTGTCTTTTAACGTCGCTAAATTTTAAATAAATAAGTGAAGAGTGTTAGTGGAGCCACT +GATTTAAAGTTGGCAGAGTAAAACTTGAAGTGCGACATAAACCACCTAATTAATTTAAAGGGTTTATGGA +GTATATAAAATTGTCATACCATCATCTTAACTTTGAAGATCGTACTGCATTAATGCTTGAGTCAAGAAAA +GAAGGCTTTTCAGCCAGAAAATTTGCTGAACTCATTAAAAGACATCCTAGTACGATCTATCGTGAGCTTA +AAAGAAATAGCATCAATGACGTTTATCAAGCTCGATATGCTTCTGATAACACCTTCGCTAGACGTAGACG +TGGTCACAGAAAACTCAAAATCGATTCAATCCTCTGGAAATTTATTGTTGAAGCGATCCGTTGTTTATGG +TCTCCTCAGCAAATAGCAAAGCGTTTAAAGACATTTCCTGATTTGGATCAAACAATGAATGTAAGCCATA +CAACGATTTATTCAACGATACGAGCATTACCAAAGGGTGAGTTGAAAAAAGACTTATTATCCTGTCTACG +TCATGAAAATAAAAAGCGAAAAGCTAACGGTGAACCTAAAAAAGATTCTATATTACAGGATATTAAAACT +ATTCATGAGCGCCCAGCCGAAGTTCAAGAAAGAAAAATACCGGGTCATTGGGAAGCTGATTTAATTAAAG +GTAAAGACAATAAAAGTTCGATAGCAACACTTATTGAACGAAATACACGGCTCTGTATCTTGGCAACATT +ACCTGATGCAAAGGCAGAATCAGTGCGCAAGGCTTTAACTGAAGCTCTGAAATATTTACCTGCAGAACTG +CGTAAAACGTTGACCTATGACCGTGGACGTGAGATGTCAGAACATAAAATACTCGAAGAAGATTTAGGCA +TAGATGTATATTTCTGTGACCCACATTCACCCTGGCAAAAAGGCACATGCGAAAATATGAATGGTTTAAT +TAGGCAATATTTACCTAAAGGGATTGATTTAAATCAGGCAGATCAGCATTATTTAAATCAAGTTGCCATG +TCACTGAATACTCGTCCTAGAAAGGCGTTAGATTGGCTTACACCATTAGAGAAATTTGCTCAGCTTGTTG +ATTATCATATGGCTTTTGAAACTGTCGCACCTCATGTTTGAATTCGCCCCATATTTTTGCTACAGTGAAC +CAAATTAAGATCATCTATTTACTAGGCCTCGCATTTGCGGGGTTTTTAATGCTGAATAAAAGGAAAACTT +GATGGAATTGCCCAATATTATGCACCCGGTCGCGAAGCTGAGCACCGCATTAGCCGCTGCATTGATGCTG +AGCGGGTGCATGCCCGGTGAAATCCGCCCGACGATTGGCCAGCAAATGGAAACTGGCGACCAACGGTTTG +GCGATCTGGTTTTCCGCCAGCTCGCACCGAATGTCTGGCAGCACACTTCCTATCTCGACATGCCGGGTTT +CGGGGCAGTCGCTTCCAACGGTTTGATCGTCAGGGATGGCGGCCGCGTGCTGGTGGTCGATACCGCCTGG +ACCGATGACCAGACCGCCCAGATCCTCAACTGGATCAAGCAGGAGATCAACCTGCCGGTCGCGCTGGCGG +TGGTGACTCACGCGCATCAGGACAAGATGGGCGGTATGGACGCGCTGCATGCGGCGGGGATTGCGACTTA +TGCCAATGCGTTGTCGAACCAGCTTGCCCCGCAAGAGGGGATGGTTGCGGCGCAACACAGCCTGACTTTC +GCCGCCAATGGCTGGGTCGAACCAGCAACCGCGCCCAACTTTGGCCCGCTCAAGGTATTTTACCCCGGCC +CCGGCCACACCAGTGACAATATCACCGTTGGGATCGACGGCACCGACATCGCTTTTGGTGGCTGCCTGAT +CAAGGACAGCAAGGCCAAGTCGCTCGGCAATCTCGGTGATGCCGACACTGAGCACTACGCCGCGTCAGCG +CGCGCGTTTGGTGCGGCGTTCCCCAAGGCCAGCATGATCGTGATGAGCCATTCCGCCCCCGATAGCCGCG +CCGCAATCACTCATACGGCCCGCATGGCCGACAAGCTGCGCTGAGCCATGGCTGACCACGTCACCCCCAA +TCTGCCATCGCGCGATTTCGATGTGACAGAGGCGTTTTATGCGAAGCTGGGCTTTGCGACGAGTTGGAAG +GATCGCGGCTGGATGATCCTGCAGCGCGGCGGTTTGCAGCTCGAATTCTTCCCCTATCCTGACCTCGACC +CAGCTACGAGCTCGTTCGGCTGTTGCCTGCGGTTGGATGATCTCGATGCCATGGTGGCATTGGTGAACGC +GGCGGGAGCCGAGGAAAAAAGCACCGGCTGGCCGCGCTTCAAAGCTCCGCAACTGGAGGCGAGCGGCCTG +AGGATCGGCTACCTGATCGATCCCGACTGCACGCTGGTGCGGCTGATCCAGAACCCCGACTGACCGCATG +CCCGCGAAAATCAAGATTTGCGGGATCAGCACACCCGAGGCGCTCGATGCGACCATCGCGGCGCGGGCGG +ACTATGCCGGGTTGGTGTTCTATCCAGCGTCGCCCCGTGCGGTTACGTCGAATGTCGCGGGCGCTTTGAC +ATCGCGCGCAGCTGGCCAGATCGCCATGGTCGGTTTGTTCGTCGATGCGGATGATGCTGTCATCGCCGAC +GCACTGGTGGCAGCCAAGCTGAACGCGCTGCAGCTGCACGGTTCGGAATCGCCCGAACGCGTGGCCCAGT +TGCGCGCGCGGTTTGGCAAGCCGGTGTGGAAGGCGCTGCCCGTCGCCAGCGCCAGCGATGTCGCACGCGC +CGCAGCCTATGCCGGGGCGGCGGACTTGATCTTGTTCGACGCCAAGACCCCCAAAGGCGCGCTGCCCGGC +GGCATGGGGTTGGCGTTCGACTGGTCGCTGCTGGCCGGATATCGCGGTGCCTTGCCGTGGGGGCTGGCAG +GCGGGCTAAATCCGACGAATGTTGCCGAGGCGATTGCGCGCACCGGAGCGCCGCTGGTCGATACCTCCAG +CGGCGTCGAAAGCGCGCCGGGCGTCAAGGATACCGACAAGATTACCAATTTCGCCTTTGCGGTGCGCTTG +GCCTAAATCGCGTCGATCAATAGGCGTCGTTCAGCGCAAAGATCGGCTTGCGGGTGCGCCACTGCCCTCG +GGTGAAGTCGGGAAAATCTAACGTGCGATTGCCCTCAGCAATCGATTGTTCCGACAGAGGCGTGATCGCG +CTCCAGGCCAGCGCGTCGTAAATGTCGATTGGCATCGGGGCCTTGGCCTTCAGCGCCTCGACAAAAGCGT +GGATCACGAACCAGTCCATCCCGCCATGCCCGGCCCCTGCCGCCAGATCGGCGTAGCGTTTCCATAGCGG +GTGATCGTATTTCGCAAACCAGCCCTCGGCAGGCTCCCAGCGGTGCGGCTGTGGGCTCTTGCCCTCCAGA +TAGATCGACTTGTTGACGTCCATCCACAGCCCCTCGGTGCCTTGCACCCGAAAGCCGAGAGAATAGGGGC +GCGGCAGCGAGGTGTCGTGGCACAGCATGATCGTTTCACCATTAGTGCAGCCGATCATGGTGTTGACCAC +ATCACCCAGTGCGAATTTCACCTCGGCGTTGGGATGATCGGCAGAGCCGTTCTTGACGACATAATCATGC +AGCCCGCGCGCCTTACAGCCGAAGCCGCCAGCGCCCGCTTCGCCCGGCAACGCGACCTTCAGGGTGCGGG +TCTGCGGCGGGTAGCACACGCCGGCATCGGCGCAGCCCTGGTACTTCACGGTCAGGGTGGTCGCGCTCGC +GCCGGCCGCGGGCGTGCCGGTGAGGGTGCCGAGCAATTCCTTGCGGTAGGTTTCGACGTCGCCGAAGAAT +TCGTCGCGGTAGGCCTTGCCCTTCGGCAGCGCCATGGTCGCGCCGGTGAAGGCGGCATCGGCCTTGACCG +AGGTGCGGTGCCGGTACAGGTAATAGCCGTCGGCGATCCGCCAGCGCACCTCGATGCGGTCCGGCGCGGT +GGCCTGCGCGGACAGGACGAAGACCTCGTCGACCGGCGGCAGTTCGAAGTCCTGGGCGACGGCCGAGGTC +GCGGGCAGCGCAAGCAGCAGGGCGAGCCCGGCCAGCCAGCGGCGCAGGCGGATCGTGGATGCGGTCATTG +GCTCAGTTTACCGGTCGGCTCTCGGCGGCCAGCCATTGCAGGTATTCGGGCAGGCCGGACGCGGCTTCGA +CCGCGAGCAGCTCCGGGAGTTCGTAGGGATATAGTATTTTATTAAATTCTTATGGGAAATGACGAATGTT +AAATTATCTTAAGAGCTTTAATAATATCAATACTTATTTGATTTTATCGATAATTCTGCTGTTAATCATA +ATATCTCTAGATTATTTCTAAACTGAATGAATGTTTATAATGAGTGATTCATATTGCTATTGAAATCGCC +TTCTCACTTTGAAAGAAGGCGAGGATGAGGGACTTTTATGTTGAATTATCATTTTAAAAATGCCTTATAA +AAGAAGCTTAATGTGTTTTCTTATATAGGTTTAAACATAATTGTTGTATATCTTAAATCCAATTGATCTT +AAAATTTTCCTTTATTTTTTGTTATGAGTGCGAGAAAATTGTCAAAAAGGTCAATCAGACTGGGCGTTAA +TTTGTTTTGCATACTTTTTCCTATATCGAATTAAAGTCATATAACTAACACCATAATCTTTAGCTATTTG +AGTGAAAGGGTATGAATCGTCCTTATTTTTAAGGGTATGAATTAACTCTTTTAGTTTTTCTTCTGTAATC +GCAGGCGATCTTCCCTTGTATTTACCTTTCTTTTTTTTAGCTAATTTAATTCCCTCTGCTTGATTCTCAC +TAATAATACCCCTTTCAAGTTCAGCTACAGCGCCTAATACATGGAGTTGAAACTTATCGAACTTGTCATC +TGAATTGGGGGTAAAGTTCAGGTTATTTTTGACAATATGAACAGACACTCCTTTTTTATTTAGCTTTTGA +ACAATGGTTACAAGGTCAATCAAGCTACGTGCCAATCTAAAAACATCATGAGCGTACACAATGTCCCCAC +TACGGACATAATCGAACATTTCCTGAAGTGCAGGGCGTTTGGCAGTCTTTCCGCTAAAATGATCAATAAA +AGTTTTATCTAGCTCAAAGGGTAGATCATGGAGCTGTCTTTCAGGGTTTTGGTCTTTAGTGGATACACGG +ATATACCCCACTCTTTGAAAGGGTGTGTTTTTAATTTGATCTTCAATATCTAAATTTTCTTTTTCCATAA +CCAGTATAACAAAATTAGATAACCTCAATGTTATATCACATTAGATTAACAAAACAACCCTATTGTTATA +GGGTTTTTAGGGTGTATTATTATATAACAATAGGGTATACCCTATTGTTATATATCTTCAGGTATAAGGA +AAAATAACGATGATTAATTTTAATGATCTAAGCGAATCTGAATTATTAAGGATTGCACAGACTGGCATAT +CAAACCGTATAGGATTGCGTACTTCAGGACATTG +