# HG changeset patch
# User iuc
# Date 1740765540 0
# Node ID a5231f824c47a214d1449c77154ee83f8774cd54
planemo upload for repository https://github.com/Syph-and-VPD-Lab/autoBIGS.cli commit 06d653e04139b58b7e9ba76ae31e74a31a31a3f3
diff -r 000000000000 -r a5231f824c47 autobigs-cli.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/autobigs-cli.xml Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,175 @@
+
+ Automated MLST typing
+
+ macros.xml
+ 0.6.2
+ 0
+
+
+
+
+ autobigs-cli
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ mode['operation'] == 'st'
+
+
+ mode['operation'] == 'info'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+@book{Deng2025RealYHD,
+title = {Syph-and-VPD-Lab/autoBIGS.cli},
+url = {https://github.com/Syph-and-VPD-Lab/autoBIGS.cli},
+author = {Deng, Harrison},
+date = {2025-01-24},
+year = {2025},
+month = {1},
+day = {24},
+}
+
+
+
diff -r 000000000000 -r a5231f824c47 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,7 @@
+
+
+
+ AutoBIGS.CLI
+
+
+
\ No newline at end of file
diff -r 000000000000 -r a5231f824c47 test-data/B3913_bpertussis_minimized_features.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/B3913_bpertussis_minimized_features.fasta Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,133 @@
+>lcl|CP011447.1_gene_2762 [gene=adk] [locus_tag=B3913_2762] [location=2916440..2917096] [gbkey=Gene]
+ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
+ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
+GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
+CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
+ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
+CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
+AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
+TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
+GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
+CGCCTGTCGCAGGCTCTGCAGAGCTAA
+>lcl|CP011447.1_gene_253 [gene=fumC] [locus_tag=B3913_0253] [location=257438..258829] [gbkey=Gene]
+ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC
+AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT
+GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC
+GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC
+AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT
+GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC
+GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA
+AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA
+TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC
+GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG
+GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT
+CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG
+AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG
+GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC
+CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG
+GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC
+TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT
+CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC
+GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC
+TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG
+>lcl|CP011447.1_gene_2963 [gene=glyA] [locus_tag=B3913_2963] [location=complement(3129365..3130612)] [gbkey=Gene]
+ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG
+TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA
+GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC
+GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC
+AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT
+GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC
+AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG
+AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC
+GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC
+GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC
+CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT
+CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG
+TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC
+TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG
+CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG
+GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG
+CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC
+CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA
+>lcl|CP011447.1_gene_2473 [gene=icd] [locus_tag=B3913_2473] [location=complement(2605674..2606930)] [gbkey=Gene]
+ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA
+ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT
+GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC
+TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG
+TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA
+CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC
+TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG
+GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT
+GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC
+CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA
+ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC
+GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG
+GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC
+TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG
+CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG
+GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG
+CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG
+TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA
+>lcl|CP011447.1_gene_2403 [gene=pepA] [locus_tag=B3913_2403] [location=2531836..2533335] [gbkey=Gene]
+ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT
+TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT
+GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC
+CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC
+AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT
+CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC
+GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC
+GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC
+CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG
+GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT
+CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC
+CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC
+TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA
+CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT
+GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC
+AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG
+CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT
+CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC
+CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC
+CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT
+GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG
+ATGCAGTACCTGCTGGACCAGGCAGGCTGA
+>lcl|CP011447.1_gene_3165 [gene=pgm] [locus_tag=B3913_3165] [location=3355021..3356403] [gbkey=Gene]
+GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG
+ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT
+GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA
+GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG
+GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT
+GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG
+CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG
+TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA
+ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC
+CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC
+TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC
+CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC
+GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG
+GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT
+CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC
+TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC
+TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC
+CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC
+AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG
+AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA
+>lcl|CP011447.1_gene_2110 [gene=tyrB] [locus_tag=B3913_2110] [location=2214524..2215726] [gbkey=Gene]
+ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA
+ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC
+GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG
+ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG
+CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT
+GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC
+GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG
+CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC
+GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG
+CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT
+TCGCCGAGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG
+GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC
+GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC
+CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA
+GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG
+TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT
+CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC
+GGTGCTGAAGTAG
\ No newline at end of file
diff -r 000000000000 -r a5231f824c47 test-data/B3913_bpertussis_minimized_features_typed.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/B3913_bpertussis_minimized_features_typed.csv Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,2 @@
+id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB
+"('lcl|CP011447.1_gene_2762', 'lcl|CP011447.1_gene_253', 'lcl|CP011447.1_gene_2963', 'lcl|CP011447.1_gene_2473', 'lcl|CP011447.1_gene_2403', 'lcl|CP011447.1_gene_3165', 'lcl|CP011447.1_gene_2110')",2,ST-2 complex,1,1,1,1,1,1,3
diff -r 000000000000 -r a5231f824c47 test-data/B3921_bpertussis_minimized_features.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/B3921_bpertussis_minimized_features.fasta Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,133 @@
+>lcl|CP011448.1_cds_ALH77808.1_2459 [gene=adk] [locus_tag=B3921_2764] [protein=adenylate kinase] [protein_id=ALH77808.1] [location=2918521..2919177] [gbkey=CDS]
+ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
+ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
+GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
+CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
+ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
+CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
+AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
+TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
+GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
+CGCCTGTCGCAGGCTCTGCAGAGCTAA
+>lcl|CP011448.1_cds_ALH75563.1_214 [gene=fumC] [locus_tag=B3921_0253] [protein=fumarate hydratase] [protein_id=ALH75563.1] [location=257428..258819] [gbkey=CDS]
+ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC
+AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT
+GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC
+GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC
+AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT
+GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC
+GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA
+AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA
+TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC
+GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG
+GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT
+CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG
+AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG
+GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC
+CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG
+GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC
+TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT
+CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC
+GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC
+TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG
+>lcl|CP011448.1_cds_ALH77981.1_2632 [gene=glyA] [locus_tag=B3921_2965] [protein=serine hydroxymethyltransferase] [protein_id=ALH77981.1] [location=complement(3131372..3132619)] [gbkey=CDS]
+ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG
+TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA
+GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC
+GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC
+AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT
+GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC
+AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG
+AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC
+GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC
+GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC
+CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT
+CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG
+TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC
+TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG
+CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG
+GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG
+CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC
+CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA
+>lcl|CP011448.1_cds_ALH77547.1_2198 [gene=icd] [locus_tag=B3921_2474] [protein=isocitrate dehydrogenase] [protein_id=ALH77547.1] [location=complement(2606706..2607962)] [gbkey=CDS]
+ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA
+ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT
+GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC
+TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG
+TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA
+CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC
+TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG
+GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT
+GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC
+CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA
+ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC
+GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG
+GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC
+TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG
+CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG
+GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG
+CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG
+TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA
+>lcl|CP011448.1_cds_ALH77480.1_2131 [gene=pepA] [locus_tag=B3921_2404] [protein=leucyl aminopeptidase] [protein_id=ALH77480.1] [location=2532868..2534367] [gbkey=CDS]
+ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT
+TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT
+GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC
+CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC
+AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT
+CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC
+GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC
+GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC
+CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG
+GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT
+CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC
+CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC
+TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA
+CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT
+GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC
+AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG
+CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT
+CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC
+CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC
+CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT
+GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG
+ATGCAGTACCTGCTGGACCAGGCAGGCTGA
+>lcl|CP011448.1_cds_ALH78163.1_2814 [gene=pgm] [locus_tag=B3921_3166] [protein=phosphoglucomutase] [protein_id=ALH78163.1] [location=3355979..3357361] [gbkey=CDS]
+GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG
+ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT
+GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA
+GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG
+GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT
+GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG
+CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG
+TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA
+ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC
+CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC
+TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC
+CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC
+GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG
+GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT
+CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC
+TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC
+TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC
+CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC
+AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG
+AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA
+>lcl|CP011448.1_cds_ALH77215.1_1866 [gene=tyrB] [locus_tag=B3921_2112] [protein=aromatic amino acid aminotransferase] [protein_id=ALH77215.1] [location=2216606..2217808] [gbkey=CDS]
+ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA
+ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC
+GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG
+ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG
+CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT
+GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC
+GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG
+CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC
+GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG
+CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT
+TCGCCGAGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG
+GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC
+GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC
+CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA
+GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG
+TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT
+CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC
+GGTGCTGAAGTAG
\ No newline at end of file
diff -r 000000000000 -r a5231f824c47 test-data/B3921_bpertussis_minimized_features_typed.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/B3921_bpertussis_minimized_features_typed.csv Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,2 @@
+id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB
+"('lcl|CP011448.1_cds_ALH77808.1_2459', 'lcl|CP011448.1_cds_ALH75563.1_214', 'lcl|CP011448.1_cds_ALH77981.1_2632', 'lcl|CP011448.1_cds_ALH77547.1_2198', 'lcl|CP011448.1_cds_ALH77480.1_2131', 'lcl|CP011448.1_cds_ALH78163.1_2814', 'lcl|CP011448.1_cds_ALH77215.1_1866')",2,ST-2 complex,1,1,1,1,1,1,3
diff -r 000000000000 -r a5231f824c47 test-data/all_mlsts.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_mlsts.csv Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,4 @@
+id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB
+"('lcl|CP011447.1_gene_2762', 'lcl|CP011447.1_gene_253', 'lcl|CP011447.1_gene_2963', 'lcl|CP011447.1_gene_2473', 'lcl|CP011447.1_gene_2403', 'lcl|CP011447.1_gene_3165', 'lcl|CP011447.1_gene_2110')",2,ST-2 complex,1,1,1,1,1,1,3
+"('lcl|BX640419.1_cds_CAE43044.1_2724', 'lcl|BX640411.1_cds_CAE40628.1_248', 'lcl|BX640420.1_cds_CAE43224.1_2904', 'lcl|BX640418.1_cds_CAE42760.1_2440', 'lcl|BX640418.1_cds_CAE42692.1_2372', 'lcl|BX640420.1_cds_CAE43408.1_3088', 'lcl|BX640416.1_cds_CAE42081.1_1761')",1,ST-2 complex,1,1,1,1,1,1,1
+"('lcl|CP011448.1_cds_ALH77808.1_2459', 'lcl|CP011448.1_cds_ALH75563.1_214', 'lcl|CP011448.1_cds_ALH77981.1_2632', 'lcl|CP011448.1_cds_ALH77547.1_2198', 'lcl|CP011448.1_cds_ALH77480.1_2131', 'lcl|CP011448.1_cds_ALH78163.1_2814', 'lcl|CP011448.1_cds_ALH77215.1_1866')",2,ST-2 complex,1,1,1,1,1,1,3
diff -r 000000000000 -r a5231f824c47 test-data/tohama_I_bpertussis_minimized_features.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tohama_I_bpertussis_minimized_features.fasta Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,133 @@
+>lcl|BX640419.1_cds_CAE43044.1_2724 [gene=adK] [locus_tag=BP2769] [db_xref=GOA:P0DKX8,InterPro:IPR000850,InterPro:IPR006259,InterPro:IPR007862,InterPro:IPR027417] [protein=adenylate kinase] [protein_id=CAE43044.1] [location=164032..164688] [gbkey=CDS]
+ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
+ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
+GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
+CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
+ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
+CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
+AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
+TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
+GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
+CGCCTGTCGCAGGCTCTGCAGAGCTAA
+>lcl|BX640411.1_cds_CAE40628.1_248 [gene=fumC] [locus_tag=BP0248] [db_xref=GOA:Q7W0A2,InterPro:IPR000362,InterPro:IPR005677,InterPro:IPR008948,InterPro:IPR018951,InterPro:IPR020557,InterPro:IPR022761,InterPro:IPR024083] [protein=fumarate hydratase class II] [protein_id=CAE40628.1] [location=256543..257934] [gbkey=CDS]
+ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC
+AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT
+GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC
+GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC
+AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT
+GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC
+GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA
+AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA
+TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC
+GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG
+GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT
+CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG
+AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG
+GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC
+CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG
+GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC
+TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT
+CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC
+GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC
+TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG
+>lcl|BX640420.1_cds_CAE43224.1_2904 [gene=glyA] [locus_tag=BP2952] [db_xref=GOA:Q7VUW7,InterPro:IPR001085,InterPro:IPR015421,InterPro:IPR015422,InterPro:IPR015424,InterPro:IPR019798] [protein=serine hydroxymethyltransferase] [protein_id=CAE43224.1] [location=complement(8611..9858)] [gbkey=CDS]
+ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG
+TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA
+GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC
+GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC
+AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT
+GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC
+AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG
+AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC
+GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC
+GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC
+CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT
+CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG
+TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC
+TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG
+CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG
+GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG
+CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC
+CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA
+>lcl|BX640418.1_cds_CAE42760.1_2440 [gene=icd] [locus_tag=BP2488] [db_xref=GOA:Q7VVZ2,InterPro:IPR001804,InterPro:IPR004439,InterPro:IPR019818,InterPro:IPR024084,UniProtKB/TrEMBL:Q7VVZ2] [protein=isocitrate dehydrogenase [NADP]] [protein_id=CAE42760.1] [location=complement(204636..205892)] [gbkey=CDS]
+ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA
+ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT
+GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC
+TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG
+TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA
+CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC
+TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG
+GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT
+GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC
+CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA
+ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC
+GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG
+GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC
+TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG
+CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG
+GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG
+CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG
+TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA
+>lcl|BX640418.1_cds_CAE42692.1_2372 [gene=pepA] [locus_tag=BP2421] [db_xref=GOA:Q7VW48,InterPro:IPR000819,InterPro:IPR008283,InterPro:IPR011356,InterPro:IPR023042] [protein=cytosol aminopeptidase] [protein_id=CAE42692.1] [location=131847..133346] [gbkey=CDS]
+ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT
+TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT
+GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC
+CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC
+AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT
+CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC
+GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC
+GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC
+CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG
+GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT
+CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC
+CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC
+TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA
+CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT
+GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC
+AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG
+CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT
+CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC
+CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC
+CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT
+GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG
+ATGCAGTACCTGCTGGACCAGGCAGGCTGA
+>lcl|BX640420.1_cds_CAE43408.1_3088 [gene=pgm] [locus_tag=BP3141] [db_xref=GOA:Q7VUF5,InterPro:IPR005841,InterPro:IPR005843,InterPro:IPR005844,InterPro:IPR005845,InterPro:IPR005846,InterPro:IPR016055,InterPro:IPR016066,UniProtKB/TrEMBL:Q7VUF5] [protein=phosphoglucomutase] [protein_id=CAE43408.1] [location=217601..218983] [gbkey=CDS]
+GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG
+ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT
+GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA
+GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG
+GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT
+GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG
+CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG
+TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA
+ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC
+CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC
+TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC
+CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC
+GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG
+GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT
+CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC
+TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC
+TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC
+CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC
+AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG
+AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA
+>lcl|BX640416.1_cds_CAE42081.1_1761 [gene=tyrB] [locus_tag=BP1795] [db_xref=GOA:Q7VXH5,InterPro:IPR000796,InterPro:IPR004838,InterPro:IPR004839,InterPro:IPR015421,InterPro:IPR015424,UniProtKB/TrEMBL:Q7VXH5] [protein=aromatic-amino-acid aminotransferase] [protein_id=CAE42081.1] [location=complement(151299..152501)] [gbkey=CDS]
+ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA
+ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC
+GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG
+ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG
+CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT
+GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC
+GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG
+CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC
+GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG
+CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT
+TCGCCGCGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG
+GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC
+GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC
+CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA
+GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG
+TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT
+CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC
+GGTGCTGAAGTAG
\ No newline at end of file
diff -r 000000000000 -r a5231f824c47 test-data/tohama_I_bpertussis_minimized_features_typed.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tohama_I_bpertussis_minimized_features_typed.csv Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,2 @@
+id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB
+"('lcl|BX640419.1_cds_CAE43044.1_2724', 'lcl|BX640411.1_cds_CAE40628.1_248', 'lcl|BX640420.1_cds_CAE43224.1_2904', 'lcl|BX640418.1_cds_CAE42760.1_2440', 'lcl|BX640418.1_cds_CAE42692.1_2372', 'lcl|BX640420.1_cds_CAE43408.1_3088', 'lcl|BX640416.1_cds_CAE42081.1_1761')",1,ST-2 complex,1,1,1,1,1,1,1