# HG changeset patch # User iuc # Date 1740765540 0 # Node ID a5231f824c47a214d1449c77154ee83f8774cd54 planemo upload for repository https://github.com/Syph-and-VPD-Lab/autoBIGS.cli commit 06d653e04139b58b7e9ba76ae31e74a31a31a3f3 diff -r 000000000000 -r a5231f824c47 autobigs-cli.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/autobigs-cli.xml Fri Feb 28 17:59:00 2025 +0000 @@ -0,0 +1,175 @@ + + Automated MLST typing + + macros.xml + 0.6.2 + 0 + + + + + autobigs-cli + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + mode['operation'] == 'st' + + + mode['operation'] == 'info' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +@book{Deng2025RealYHD, +title = {Syph-and-VPD-Lab/autoBIGS.cli}, +url = {https://github.com/Syph-and-VPD-Lab/autoBIGS.cli}, +author = {Deng, Harrison}, +date = {2025-01-24}, +year = {2025}, +month = {1}, +day = {24}, +} + + + diff -r 000000000000 -r a5231f824c47 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Feb 28 17:59:00 2025 +0000 @@ -0,0 +1,7 @@ + + + + AutoBIGS.CLI + + + \ No newline at end of file diff -r 000000000000 -r a5231f824c47 test-data/B3913_bpertussis_minimized_features.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/B3913_bpertussis_minimized_features.fasta Fri Feb 28 17:59:00 2025 +0000 @@ -0,0 +1,133 @@ +>lcl|CP011447.1_gene_2762 [gene=adk] [locus_tag=B3913_2762] [location=2916440..2917096] [gbkey=Gene] +ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT +ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT +GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT +CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG +ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT +CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC +AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG +TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA +GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC +CGCCTGTCGCAGGCTCTGCAGAGCTAA +>lcl|CP011447.1_gene_253 [gene=fumC] [locus_tag=B3913_0253] [location=257438..258829] [gbkey=Gene] +ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC +AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT +GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC +GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC +AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT +GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC +GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA +AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA +TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC +GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG +GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT +CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG +AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG +GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC +CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG +GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC +TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT +CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC +GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC +TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG +>lcl|CP011447.1_gene_2963 [gene=glyA] [locus_tag=B3913_2963] [location=complement(3129365..3130612)] [gbkey=Gene] +ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG +TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA +GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC +GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC +AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT +GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC +AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG +AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC +GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC +GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC +CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT +CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG +TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC +TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG +CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG +GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG +CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC +CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA +>lcl|CP011447.1_gene_2473 [gene=icd] [locus_tag=B3913_2473] [location=complement(2605674..2606930)] [gbkey=Gene] +ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA +ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT +GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC +TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG +TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA +CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC +TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG +GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT +GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC +CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA +ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC +GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG +GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC +TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG +CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG +GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG +CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG +TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA +>lcl|CP011447.1_gene_2403 [gene=pepA] [locus_tag=B3913_2403] [location=2531836..2533335] [gbkey=Gene] +ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT +TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT +GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC +CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC +AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT +CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC +GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC +GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC +CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG +GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT +CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC +CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC +TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA +CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT +GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC +AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG +CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT +CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC +CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC +CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT +GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG +ATGCAGTACCTGCTGGACCAGGCAGGCTGA +>lcl|CP011447.1_gene_3165 [gene=pgm] [locus_tag=B3913_3165] [location=3355021..3356403] [gbkey=Gene] +GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG +ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT +GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA +GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG +GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT +GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG +CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG +TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA +ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC +CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC +TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC +CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC +GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG +GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT +CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC +TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC +TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC +CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC +AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG +AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA +>lcl|CP011447.1_gene_2110 [gene=tyrB] [locus_tag=B3913_2110] [location=2214524..2215726] [gbkey=Gene] +ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA +ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC +GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG +ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG +CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT +GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC +GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG +CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC +GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG +CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT +TCGCCGAGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG +GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC +GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC +CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA +GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG +TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT +CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC +GGTGCTGAAGTAG \ No newline at end of file diff -r 000000000000 -r a5231f824c47 test-data/B3913_bpertussis_minimized_features_typed.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/B3913_bpertussis_minimized_features_typed.csv Fri Feb 28 17:59:00 2025 +0000 @@ -0,0 +1,2 @@ +id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB +"('lcl|CP011447.1_gene_2762', 'lcl|CP011447.1_gene_253', 'lcl|CP011447.1_gene_2963', 'lcl|CP011447.1_gene_2473', 'lcl|CP011447.1_gene_2403', 'lcl|CP011447.1_gene_3165', 'lcl|CP011447.1_gene_2110')",2,ST-2 complex,1,1,1,1,1,1,3 diff -r 000000000000 -r a5231f824c47 test-data/B3921_bpertussis_minimized_features.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/B3921_bpertussis_minimized_features.fasta Fri Feb 28 17:59:00 2025 +0000 @@ -0,0 +1,133 @@ +>lcl|CP011448.1_cds_ALH77808.1_2459 [gene=adk] [locus_tag=B3921_2764] [protein=adenylate kinase] [protein_id=ALH77808.1] [location=2918521..2919177] [gbkey=CDS] +ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT +ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT +GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT +CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG +ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT +CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC +AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG +TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA +GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC +CGCCTGTCGCAGGCTCTGCAGAGCTAA +>lcl|CP011448.1_cds_ALH75563.1_214 [gene=fumC] [locus_tag=B3921_0253] [protein=fumarate hydratase] [protein_id=ALH75563.1] [location=257428..258819] [gbkey=CDS] +ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC +AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT +GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC +GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC +AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT +GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC +GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA +AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA +TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC +GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG +GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT +CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG +AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG +GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC +CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG +GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC +TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT +CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC +GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC +TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG +>lcl|CP011448.1_cds_ALH77981.1_2632 [gene=glyA] [locus_tag=B3921_2965] [protein=serine hydroxymethyltransferase] [protein_id=ALH77981.1] [location=complement(3131372..3132619)] [gbkey=CDS] +ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG +TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA +GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC +GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC +AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT +GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC +AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG +AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC +GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC +GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC +CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT +CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG +TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC +TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG +CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG +GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG +CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC +CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA +>lcl|CP011448.1_cds_ALH77547.1_2198 [gene=icd] [locus_tag=B3921_2474] [protein=isocitrate dehydrogenase] [protein_id=ALH77547.1] [location=complement(2606706..2607962)] [gbkey=CDS] +ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA +ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT +GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC +TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG +TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA +CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC +TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG +GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT +GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC +CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA +ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC +GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG +GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC +TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG +CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG +GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG +CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG +TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA +>lcl|CP011448.1_cds_ALH77480.1_2131 [gene=pepA] [locus_tag=B3921_2404] [protein=leucyl aminopeptidase] [protein_id=ALH77480.1] [location=2532868..2534367] [gbkey=CDS] +ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT +TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT +GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC +CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC +AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT +CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC +GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC +GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC +CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG +GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT +CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC +CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC +TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA +CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT +GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC +AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG +CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT +CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC +CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC +CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT +GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG +ATGCAGTACCTGCTGGACCAGGCAGGCTGA +>lcl|CP011448.1_cds_ALH78163.1_2814 [gene=pgm] [locus_tag=B3921_3166] [protein=phosphoglucomutase] [protein_id=ALH78163.1] [location=3355979..3357361] [gbkey=CDS] +GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG +ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT +GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA +GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG +GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT +GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG +CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG +TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA +ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC +CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC +TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC +CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC +GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG +GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT +CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC +TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC +TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC +CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC +AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG +AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA +>lcl|CP011448.1_cds_ALH77215.1_1866 [gene=tyrB] [locus_tag=B3921_2112] [protein=aromatic amino acid aminotransferase] [protein_id=ALH77215.1] [location=2216606..2217808] [gbkey=CDS] +ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA +ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC +GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG +ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG +CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT +GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC +GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG +CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC +GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG +CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT +TCGCCGAGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG +GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC +GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC +CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA +GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG +TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT +CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC +GGTGCTGAAGTAG \ No newline at end of file diff -r 000000000000 -r a5231f824c47 test-data/B3921_bpertussis_minimized_features_typed.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/B3921_bpertussis_minimized_features_typed.csv Fri Feb 28 17:59:00 2025 +0000 @@ -0,0 +1,2 @@ +id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB +"('lcl|CP011448.1_cds_ALH77808.1_2459', 'lcl|CP011448.1_cds_ALH75563.1_214', 'lcl|CP011448.1_cds_ALH77981.1_2632', 'lcl|CP011448.1_cds_ALH77547.1_2198', 'lcl|CP011448.1_cds_ALH77480.1_2131', 'lcl|CP011448.1_cds_ALH78163.1_2814', 'lcl|CP011448.1_cds_ALH77215.1_1866')",2,ST-2 complex,1,1,1,1,1,1,3 diff -r 000000000000 -r a5231f824c47 test-data/all_mlsts.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/all_mlsts.csv Fri Feb 28 17:59:00 2025 +0000 @@ -0,0 +1,4 @@ +id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB +"('lcl|CP011447.1_gene_2762', 'lcl|CP011447.1_gene_253', 'lcl|CP011447.1_gene_2963', 'lcl|CP011447.1_gene_2473', 'lcl|CP011447.1_gene_2403', 'lcl|CP011447.1_gene_3165', 'lcl|CP011447.1_gene_2110')",2,ST-2 complex,1,1,1,1,1,1,3 +"('lcl|BX640419.1_cds_CAE43044.1_2724', 'lcl|BX640411.1_cds_CAE40628.1_248', 'lcl|BX640420.1_cds_CAE43224.1_2904', 'lcl|BX640418.1_cds_CAE42760.1_2440', 'lcl|BX640418.1_cds_CAE42692.1_2372', 'lcl|BX640420.1_cds_CAE43408.1_3088', 'lcl|BX640416.1_cds_CAE42081.1_1761')",1,ST-2 complex,1,1,1,1,1,1,1 +"('lcl|CP011448.1_cds_ALH77808.1_2459', 'lcl|CP011448.1_cds_ALH75563.1_214', 'lcl|CP011448.1_cds_ALH77981.1_2632', 'lcl|CP011448.1_cds_ALH77547.1_2198', 'lcl|CP011448.1_cds_ALH77480.1_2131', 'lcl|CP011448.1_cds_ALH78163.1_2814', 'lcl|CP011448.1_cds_ALH77215.1_1866')",2,ST-2 complex,1,1,1,1,1,1,3 diff -r 000000000000 -r a5231f824c47 test-data/tohama_I_bpertussis_minimized_features.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tohama_I_bpertussis_minimized_features.fasta Fri Feb 28 17:59:00 2025 +0000 @@ -0,0 +1,133 @@ +>lcl|BX640419.1_cds_CAE43044.1_2724 [gene=adK] [locus_tag=BP2769] [db_xref=GOA:P0DKX8,InterPro:IPR000850,InterPro:IPR006259,InterPro:IPR007862,InterPro:IPR027417] [protein=adenylate kinase] [protein_id=CAE43044.1] [location=164032..164688] [gbkey=CDS] +ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT +ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT +GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT +CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG +ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT +CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC +AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG +TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA +GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC +CGCCTGTCGCAGGCTCTGCAGAGCTAA +>lcl|BX640411.1_cds_CAE40628.1_248 [gene=fumC] [locus_tag=BP0248] [db_xref=GOA:Q7W0A2,InterPro:IPR000362,InterPro:IPR005677,InterPro:IPR008948,InterPro:IPR018951,InterPro:IPR020557,InterPro:IPR022761,InterPro:IPR024083] [protein=fumarate hydratase class II] [protein_id=CAE40628.1] [location=256543..257934] [gbkey=CDS] +ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC +AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT +GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC +GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC +AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT +GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC +GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA +AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA +TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC +GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG +GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT +CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG +AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG +GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC +CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG +GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC +TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT +CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC +GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC +TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG +>lcl|BX640420.1_cds_CAE43224.1_2904 [gene=glyA] [locus_tag=BP2952] [db_xref=GOA:Q7VUW7,InterPro:IPR001085,InterPro:IPR015421,InterPro:IPR015422,InterPro:IPR015424,InterPro:IPR019798] [protein=serine hydroxymethyltransferase] [protein_id=CAE43224.1] [location=complement(8611..9858)] [gbkey=CDS] +ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG +TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA +GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC +GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC +AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT +GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC +AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG +AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC +GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC +GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC +CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT +CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG +TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC +TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG +CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG +GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG +CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC +CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA +>lcl|BX640418.1_cds_CAE42760.1_2440 [gene=icd] [locus_tag=BP2488] [db_xref=GOA:Q7VVZ2,InterPro:IPR001804,InterPro:IPR004439,InterPro:IPR019818,InterPro:IPR024084,UniProtKB/TrEMBL:Q7VVZ2] [protein=isocitrate dehydrogenase [NADP]] [protein_id=CAE42760.1] [location=complement(204636..205892)] [gbkey=CDS] +ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA +ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT +GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC +TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG +TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA +CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC +TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG +GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT +GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC +CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA +ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC +GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG +GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC +TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG +CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG +GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG +CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG +TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA +>lcl|BX640418.1_cds_CAE42692.1_2372 [gene=pepA] [locus_tag=BP2421] [db_xref=GOA:Q7VW48,InterPro:IPR000819,InterPro:IPR008283,InterPro:IPR011356,InterPro:IPR023042] [protein=cytosol aminopeptidase] [protein_id=CAE42692.1] [location=131847..133346] [gbkey=CDS] +ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT +TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT +GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC +CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC +AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT +CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC +GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC +GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC +CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG +GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT +CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC +CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC +TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA +CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT +GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC +AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG +CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT +CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC +CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC +CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT +GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG +ATGCAGTACCTGCTGGACCAGGCAGGCTGA +>lcl|BX640420.1_cds_CAE43408.1_3088 [gene=pgm] [locus_tag=BP3141] [db_xref=GOA:Q7VUF5,InterPro:IPR005841,InterPro:IPR005843,InterPro:IPR005844,InterPro:IPR005845,InterPro:IPR005846,InterPro:IPR016055,InterPro:IPR016066,UniProtKB/TrEMBL:Q7VUF5] [protein=phosphoglucomutase] [protein_id=CAE43408.1] [location=217601..218983] [gbkey=CDS] +GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG +ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT +GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA +GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG +GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT +GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG +CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG +TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA +ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC +CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC +TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC +CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC +GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG +GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT +CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC +TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC +TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC +CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC +AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG +AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA +>lcl|BX640416.1_cds_CAE42081.1_1761 [gene=tyrB] [locus_tag=BP1795] [db_xref=GOA:Q7VXH5,InterPro:IPR000796,InterPro:IPR004838,InterPro:IPR004839,InterPro:IPR015421,InterPro:IPR015424,UniProtKB/TrEMBL:Q7VXH5] [protein=aromatic-amino-acid aminotransferase] [protein_id=CAE42081.1] [location=complement(151299..152501)] [gbkey=CDS] +ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA +ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC +GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG +ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG +CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT +GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC +GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG +CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC +GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG +CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT +TCGCCGCGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG +GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC +GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC +CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA +GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG +TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT +CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC +GGTGCTGAAGTAG \ No newline at end of file diff -r 000000000000 -r a5231f824c47 test-data/tohama_I_bpertussis_minimized_features_typed.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tohama_I_bpertussis_minimized_features_typed.csv Fri Feb 28 17:59:00 2025 +0000 @@ -0,0 +1,2 @@ +id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB +"('lcl|BX640419.1_cds_CAE43044.1_2724', 'lcl|BX640411.1_cds_CAE40628.1_248', 'lcl|BX640420.1_cds_CAE43224.1_2904', 'lcl|BX640418.1_cds_CAE42760.1_2440', 'lcl|BX640418.1_cds_CAE42692.1_2372', 'lcl|BX640420.1_cds_CAE43408.1_3088', 'lcl|BX640416.1_cds_CAE42081.1_1761')",1,ST-2 complex,1,1,1,1,1,1,1