changeset 0:a5231f824c47 draft default tip

planemo upload for repository https://github.com/Syph-and-VPD-Lab/autoBIGS.cli commit 06d653e04139b58b7e9ba76ae31e74a31a31a3f3
author iuc
date Fri, 28 Feb 2025 17:59:00 +0000
parents
children
files autobigs-cli.xml macros.xml test-data/B3913_bpertussis_minimized_features.fasta test-data/B3913_bpertussis_minimized_features_typed.csv test-data/B3921_bpertussis_minimized_features.fasta test-data/B3921_bpertussis_minimized_features_typed.csv test-data/all_mlsts.csv test-data/tohama_I_bpertussis_minimized_features.fasta test-data/tohama_I_bpertussis_minimized_features_typed.csv
diffstat 9 files changed, 591 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/autobigs-cli.xml	Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,175 @@
+<tool id="autobigs-cli" name="autoBIGS.cli" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
+    <description>Automated MLST typing</description>
+    <macros>
+        <import>macros.xml</import>
+        <token name="@TOOL_VERSION@">0.6.2</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <expand macro="bio_tools"/>
+    </macros>
+
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">autobigs-cli</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+            #if $mode.operation == "info":
+                autoBIGS info --retrieve-bigsdb-schemes '$database_origin.bigsdb' --csv '$info_schemes_out'
+            #else if $mode.operation == "st":
+                autoBIGS st --scheme-name '$mode.scheme' '$fasta' '$database_origin.bigsdb' '$mlst_profiles_output'
+            #end if
+    ]]></command>
+    <inputs>
+        <conditional name="database_origin">
+            <param name="origin_select" label="Database Origin" type="select" help="The origin of the database to be accessed" optional="false">
+                <option value="pubmlst" selected="true">PubMLST</option>
+                <option value="institutpasteur">Institut Pasteur</option>
+            </param>
+            <when value="pubmlst">
+                <param name="bigsdb" label="PubMLST Database" type="select" help="Select from a list of sequence definition databases hosted by pubMLST." optional="false">
+                    <options from_url="https://rest.pubmlst.org/db" request_method="GET">
+                        <postprocess_expression type="ecma5.1"><![CDATA[${
+                            let options = []
+                            for (let resourceGroup of inputs) {
+                                for (let database of resourceGroup.databases) {
+                                    if (database.name.endsWith("seqdef")) {
+                                        options.push([database.description, database.name])
+                                    }
+                                }
+                            }
+                            return options
+                        }]]></postprocess_expression>
+                    </options>
+                </param>
+            </when>
+            <when value="institutpasteur">
+                <param name="bigsdb" label="PubMLST Database" type="select" help="Select from a list of sequence definition databases hosted by Institut Pasteur." optional="false">
+                    <options from_url="https://bigsdb.pasteur.fr/api/db" request_method="GET">
+                        <postprocess_expression type="ecma5.1"><![CDATA[${
+                            let options = []
+                            for (let resourceGroup of inputs) {
+                                for (let database of resourceGroup.databases) {
+                                    if (database.name.endsWith("seqdef")) {
+                                        options.push([database.description, database.name])
+                                    }
+                                }
+                            }
+                            return options
+                        }]]></postprocess_expression>
+                    </options>
+                </param>
+            </when>
+        </conditional>
+        <conditional name="mode">
+            <param name="operation" label="Operation" type="select" help="The operational mode of the tool.">
+                <option value="st" selected="true">Perform sequence typing</option>
+                <option value="info">Retrieve available schemes for database</option>
+            </param>
+            <when value="st">
+                <param name="fasta" argument="fastas" label="FASTA file" type="data" format="fasta" multiple="true" help="The FASTA(s) file to perform sequence typing on."/>
+                <param name="scheme" argument="--scheme-name" label="BIGSdb SeqDef scheme name" type="text" optional="false"  value="MLST" help="The name of the scheme to be used. Defaults to MLST. See help for more information."/>
+            </when>
+            <when value="info">
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="mlst_profiles_output" label="${tool.name} on ${on_string}" format="csv">
+            <filter>mode['operation'] == 'st'</filter>
+        </data>
+        <data name="info_schemes_out" label="${tool.name} supported Schemes" format="csv">
+            <filter>mode['operation'] == 'info'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="origin_select" value="institutpasteur"/>
+            <param name="bigsdb" value="pubmlst_bordetella_seqdef" />
+            <param name="operation" value="st" />
+            <param name="fasta" value="tohama_I_bpertussis_minimized_features.fasta" />
+            <param name="scheme" value="MLST" />
+            <output name="mlst_profiles_output" file="tohama_I_bpertussis_minimized_features_typed.csv" ftype="csv" />
+        </test>
+        <test expect_num_outputs="1">
+            <param name="origin_select" value="institutpasteur"/>
+            <param name="bigsdb" value="pubmlst_bordetella_seqdef" />
+            <param name="operation" value="st" />
+            <param name="fasta" value="B3921_bpertussis_minimized_features.fasta" />
+            <param name="scheme" value="MLST" />
+            <output name="mlst_profiles_output" file="B3921_bpertussis_minimized_features_typed.csv" ftype="csv" />
+        </test>
+        <test expect_num_outputs="1">
+            <param name="origin_select" value="institutpasteur"/>
+            <param name="bigsdb" value="pubmlst_bordetella_seqdef" />
+            <param name="operation" value="st" />
+            <param name="fasta" value="B3913_bpertussis_minimized_features.fasta" />
+            <param name="scheme" value="MLST" />
+            <output name="mlst_profiles_output" file="B3913_bpertussis_minimized_features_typed.csv" ftype="csv" />
+        </test>
+        <test expect_num_outputs="1">
+            <param name="origin_select" value="institutpasteur"/>
+            <param name="bigsdb" value="pubmlst_bordetella_seqdef" />
+            <param name="operation" value="st" />
+            <param name="fasta" value="tohama_I_bpertussis_minimized_features.fasta,B3913_bpertussis_minimized_features.fasta,B3921_bpertussis_minimized_features.fasta" />
+            <param name="scheme" value="MLST" />
+            <output name="mlst_profiles_output">
+                <assert_contents>
+                    <has_n_lines n="4"/>
+                    <has_text text="id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="origin_select" value="institutpasteur"/>
+            <param name="bigsdb" value="pubmlst_bordetella_seqdef" />
+            <param name="operation" value="info" />
+            <output name="info_schemes_out">
+                <assert_contents>
+                    <has_text text="Name,ID"/>
+                    <has_text text="MLST,3"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+What is autoBIGS
+================
+autoBIGS is a tool to help automatically performes multi-locus sequence typing (MLST) on given data by performing calls to necessary web BIGS database web APIs.
+
+Support for typing schemes other than MLST
+==========================================
+AutoBIGS is capable of performing typing for any available schemes given a database. To retrieve a list of available schemes in the form of a spreadsheet (CSV), under operation, select "retrieve available schemes for database" and run the tool.
+
+Performing Sequence Typing
+==========================
+Simply select "Perform Sequence Typing" for the "Operation" and select your FASTA files. Then, enter your SeqDef Database name (see "Obtaining Database Name and Schema ID" above) and schema ID.
+
+Special behaviour for FASTAs with multiple sequences
+====================================================
+AutoBIGS will treat multiple sequences in the same FASTA file as part of the same sample. This will result in a fasta with multiple sequences within the same row with the final sequence type being retrieved from the resulting alleles of the individual sequences within the FASTA. This is helpful if the input FASTA was obtained from a targetted form of sequencing for the specific needed regions (e.g., Sanger sequencing of 7 house keeping genes), rather than a whole genome sequence. If your data is a whole genome sequence (WGS) of the entire genome or similar, each WGS should have it's own FASTA.
+
+In the case all sequences in a given FASTA have the same ID, autoBIGS will not repeat these names in the output row.
+
+More Information
+================
+For more information on the tool being wrapped itself, please see the `autoBIGS.cli Github repository`_. Issues, bugs, and feature requests for the tool itself should be submitted to the `autoBIGS.cli issues`_. If the issue/bug/feature request is solely pertanent to the Galaxy wrapper, please check out the `autoBIGS.galaxy issues`_ tracker on GitHub.
+
+
+.. _autoBIGS.cli GitHub repository: https://github.com/Syph-and-VPD-Lab/autoBIGS.cli
+.. _autoBIGS.cli issues: https://github.com/Syph-and-VPD-Lab/autoBIGS.cli/issues
+.. _autoBIGS.galaxy: https://github.com/Syph-and-VPD-Lab/autoBIGS.galaxy
+.. _autoBIGS.galaxy issues: https://github.com/Syph-and-VPD-Lab/autoBIGS.galaxy/issues
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+@book{Deng2025RealYHD,
+title = {Syph-and-VPD-Lab/autoBIGS.cli},
+url = {https://github.com/Syph-and-VPD-Lab/autoBIGS.cli},
+author = {Deng, Harrison},
+date = {2025-01-24},
+year = {2025},
+month = {1},
+day = {24},
+}
+        </citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,7 @@
+<macros>
+  <xml name="bio_tools">
+    <xrefs>
+        <xref type="bio.tools">AutoBIGS.CLI</xref>
+    </xrefs>
+  </xml>
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/B3913_bpertussis_minimized_features.fasta	Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,133 @@
+>lcl|CP011447.1_gene_2762 [gene=adk] [locus_tag=B3913_2762] [location=2916440..2917096] [gbkey=Gene]
+ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
+ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
+GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
+CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
+ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
+CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
+AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
+TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
+GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
+CGCCTGTCGCAGGCTCTGCAGAGCTAA
+>lcl|CP011447.1_gene_253 [gene=fumC] [locus_tag=B3913_0253] [location=257438..258829] [gbkey=Gene]
+ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC
+AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT
+GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC
+GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC
+AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT
+GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC
+GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA
+AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA
+TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC
+GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG
+GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT
+CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG
+AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG
+GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC
+CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG
+GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC
+TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT
+CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC
+GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC
+TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG
+>lcl|CP011447.1_gene_2963 [gene=glyA] [locus_tag=B3913_2963] [location=complement(3129365..3130612)] [gbkey=Gene]
+ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG
+TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA
+GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC
+GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC
+AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT
+GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC
+AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG
+AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC
+GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC
+GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC
+CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT
+CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG
+TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC
+TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG
+CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG
+GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG
+CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC
+CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA
+>lcl|CP011447.1_gene_2473 [gene=icd] [locus_tag=B3913_2473] [location=complement(2605674..2606930)] [gbkey=Gene]
+ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA
+ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT
+GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC
+TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG
+TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA
+CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC
+TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG
+GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT
+GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC
+CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA
+ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC
+GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG
+GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC
+TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG
+CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG
+GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG
+CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG
+TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA
+>lcl|CP011447.1_gene_2403 [gene=pepA] [locus_tag=B3913_2403] [location=2531836..2533335] [gbkey=Gene]
+ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT
+TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT
+GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC
+CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC
+AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT
+CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC
+GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC
+GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC
+CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG
+GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT
+CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC
+CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC
+TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA
+CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT
+GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC
+AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG
+CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT
+CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC
+CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC
+CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT
+GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG
+ATGCAGTACCTGCTGGACCAGGCAGGCTGA
+>lcl|CP011447.1_gene_3165 [gene=pgm] [locus_tag=B3913_3165] [location=3355021..3356403] [gbkey=Gene]
+GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG
+ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT
+GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA
+GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG
+GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT
+GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG
+CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG
+TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA
+ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC
+CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC
+TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC
+CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC
+GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG
+GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT
+CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC
+TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC
+TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC
+CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC
+AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG
+AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA
+>lcl|CP011447.1_gene_2110 [gene=tyrB] [locus_tag=B3913_2110] [location=2214524..2215726] [gbkey=Gene]
+ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA
+ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC
+GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG
+ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG
+CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT
+GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC
+GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG
+CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC
+GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG
+CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT
+TCGCCGAGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG
+GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC
+GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC
+CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA
+GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG
+TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT
+CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC
+GGTGCTGAAGTAG
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/B3913_bpertussis_minimized_features_typed.csv	Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,2 @@
+id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB
+"('lcl|CP011447.1_gene_2762', 'lcl|CP011447.1_gene_253', 'lcl|CP011447.1_gene_2963', 'lcl|CP011447.1_gene_2473', 'lcl|CP011447.1_gene_2403', 'lcl|CP011447.1_gene_3165', 'lcl|CP011447.1_gene_2110')",2,ST-2 complex,1,1,1,1,1,1,3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/B3921_bpertussis_minimized_features.fasta	Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,133 @@
+>lcl|CP011448.1_cds_ALH77808.1_2459 [gene=adk] [locus_tag=B3921_2764] [protein=adenylate kinase] [protein_id=ALH77808.1] [location=2918521..2919177] [gbkey=CDS]
+ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
+ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
+GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
+CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
+ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
+CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
+AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
+TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
+GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
+CGCCTGTCGCAGGCTCTGCAGAGCTAA
+>lcl|CP011448.1_cds_ALH75563.1_214 [gene=fumC] [locus_tag=B3921_0253] [protein=fumarate hydratase] [protein_id=ALH75563.1] [location=257428..258819] [gbkey=CDS]
+ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC
+AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT
+GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC
+GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC
+AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT
+GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC
+GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA
+AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA
+TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC
+GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG
+GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT
+CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG
+AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG
+GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC
+CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG
+GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC
+TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT
+CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC
+GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC
+TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG
+>lcl|CP011448.1_cds_ALH77981.1_2632 [gene=glyA] [locus_tag=B3921_2965] [protein=serine hydroxymethyltransferase] [protein_id=ALH77981.1] [location=complement(3131372..3132619)] [gbkey=CDS]
+ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG
+TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA
+GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC
+GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC
+AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT
+GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC
+AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG
+AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC
+GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC
+GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC
+CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT
+CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG
+TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC
+TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG
+CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG
+GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG
+CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC
+CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA
+>lcl|CP011448.1_cds_ALH77547.1_2198 [gene=icd] [locus_tag=B3921_2474] [protein=isocitrate dehydrogenase] [protein_id=ALH77547.1] [location=complement(2606706..2607962)] [gbkey=CDS]
+ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA
+ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT
+GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC
+TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG
+TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA
+CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC
+TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG
+GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT
+GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC
+CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA
+ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC
+GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG
+GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC
+TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG
+CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG
+GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG
+CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG
+TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA
+>lcl|CP011448.1_cds_ALH77480.1_2131 [gene=pepA] [locus_tag=B3921_2404] [protein=leucyl aminopeptidase] [protein_id=ALH77480.1] [location=2532868..2534367] [gbkey=CDS]
+ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT
+TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT
+GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC
+CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC
+AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT
+CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC
+GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC
+GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC
+CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG
+GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT
+CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC
+CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC
+TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA
+CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT
+GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC
+AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG
+CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT
+CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC
+CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC
+CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT
+GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG
+ATGCAGTACCTGCTGGACCAGGCAGGCTGA
+>lcl|CP011448.1_cds_ALH78163.1_2814 [gene=pgm] [locus_tag=B3921_3166] [protein=phosphoglucomutase] [protein_id=ALH78163.1] [location=3355979..3357361] [gbkey=CDS]
+GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG
+ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT
+GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA
+GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG
+GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT
+GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG
+CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG
+TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA
+ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC
+CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC
+TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC
+CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC
+GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG
+GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT
+CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC
+TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC
+TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC
+CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC
+AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG
+AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA
+>lcl|CP011448.1_cds_ALH77215.1_1866 [gene=tyrB] [locus_tag=B3921_2112] [protein=aromatic amino acid aminotransferase] [protein_id=ALH77215.1] [location=2216606..2217808] [gbkey=CDS]
+ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA
+ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC
+GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG
+ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG
+CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT
+GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC
+GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG
+CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC
+GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG
+CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT
+TCGCCGAGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG
+GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC
+GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC
+CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA
+GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG
+TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT
+CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC
+GGTGCTGAAGTAG
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/B3921_bpertussis_minimized_features_typed.csv	Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,2 @@
+id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB
+"('lcl|CP011448.1_cds_ALH77808.1_2459', 'lcl|CP011448.1_cds_ALH75563.1_214', 'lcl|CP011448.1_cds_ALH77981.1_2632', 'lcl|CP011448.1_cds_ALH77547.1_2198', 'lcl|CP011448.1_cds_ALH77480.1_2131', 'lcl|CP011448.1_cds_ALH78163.1_2814', 'lcl|CP011448.1_cds_ALH77215.1_1866')",2,ST-2 complex,1,1,1,1,1,1,3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_mlsts.csv	Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,4 @@
+id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB
+"('lcl|CP011447.1_gene_2762', 'lcl|CP011447.1_gene_253', 'lcl|CP011447.1_gene_2963', 'lcl|CP011447.1_gene_2473', 'lcl|CP011447.1_gene_2403', 'lcl|CP011447.1_gene_3165', 'lcl|CP011447.1_gene_2110')",2,ST-2 complex,1,1,1,1,1,1,3
+"('lcl|BX640419.1_cds_CAE43044.1_2724', 'lcl|BX640411.1_cds_CAE40628.1_248', 'lcl|BX640420.1_cds_CAE43224.1_2904', 'lcl|BX640418.1_cds_CAE42760.1_2440', 'lcl|BX640418.1_cds_CAE42692.1_2372', 'lcl|BX640420.1_cds_CAE43408.1_3088', 'lcl|BX640416.1_cds_CAE42081.1_1761')",1,ST-2 complex,1,1,1,1,1,1,1
+"('lcl|CP011448.1_cds_ALH77808.1_2459', 'lcl|CP011448.1_cds_ALH75563.1_214', 'lcl|CP011448.1_cds_ALH77981.1_2632', 'lcl|CP011448.1_cds_ALH77547.1_2198', 'lcl|CP011448.1_cds_ALH77480.1_2131', 'lcl|CP011448.1_cds_ALH78163.1_2814', 'lcl|CP011448.1_cds_ALH77215.1_1866')",2,ST-2 complex,1,1,1,1,1,1,3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tohama_I_bpertussis_minimized_features.fasta	Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,133 @@
+>lcl|BX640419.1_cds_CAE43044.1_2724 [gene=adK] [locus_tag=BP2769] [db_xref=GOA:P0DKX8,InterPro:IPR000850,InterPro:IPR006259,InterPro:IPR007862,InterPro:IPR027417] [protein=adenylate kinase] [protein_id=CAE43044.1] [location=164032..164688] [gbkey=CDS]
+ATGCGTCTCATTCTGCTCGGACCGCCCGGAGCCGGCAAAGGCACCCAAGCCGCCTTTCTCACCCAACACT
+ACGGCATCCCGCAGATATCCACCGGTGACATGCTGCGCGCCGCCGTCAAGGCCGGCACGCCGCTGGGCCT
+GGAAGCCAAGAAGGTCATGGACGCGGGCGGCCTGGTCTCGGACGACCTGATCATCGGCCTGGTGCGCGAT
+CGCCTGACCCAGCCCGATTGCGCCAACGGCTACCTGTTCGACGGTTTCCCGCGCACCATCCCGCAGGCCG
+ACGCGCTCAAGAGCGCCGGCATCGCGCTGGATTACGTGGTCGAGATCGAAGTGCCGGAAAGCGACATCAT
+CGAACGCATGAGCGAACGCCGCGTGCACCCGGCCAGCGGCCGCAGCTACCACGTACGCTTCAATCCGCCC
+AAGGCCGAAGGCGTGGACGACGTCACGGGCGAACCGCTGGTGCAGCGCGACGACGACCGCGAGGAAACCG
+TGCGCCATCGTCTCAACGTCTACCAGAACCAGACCCGCCCGCTGGTCGACTACTACTCGTCCTGGGCCCA
+GTCCGATGCCGCCGCGGCGCCCAAGTACCGCAAGATCTCCGGCGTCGGCTCGGTCGACGAAATCAAGAGC
+CGCCTGTCGCAGGCTCTGCAGAGCTAA
+>lcl|BX640411.1_cds_CAE40628.1_248 [gene=fumC] [locus_tag=BP0248] [db_xref=GOA:Q7W0A2,InterPro:IPR000362,InterPro:IPR005677,InterPro:IPR008948,InterPro:IPR018951,InterPro:IPR020557,InterPro:IPR022761,InterPro:IPR024083] [protein=fumarate hydratase class II] [protein_id=CAE40628.1] [location=256543..257934] [gbkey=CDS]
+ATGAAAACCCGCACCGAAAAAGACACTTTCGGCCCGATCGAGGTGCCCGAGCAGCACCTGTGGGGCGCGC
+AGACCCAGCGCTCGCTGCATTTCTTCGCGATCTCGACCGAGAAGATGCCGGTGCCGCTGGTCGCCGCCAT
+GGCACGCCTGAAGCGCGCCGCCGCCAAGGTCAACGCCGAGCTGGGCGAGCTGGATCCGCAGGTCGCAGAC
+GCCATCATGCGGGCCGCCGATGAGGTGATCGCCGGCAAGTGGCCCGACGAGTTTCCGCTGTCGGTCTGGC
+AGACCGGCTCGGGCACGCAGAGCAACATGAACATGAACGAGGTGCTGGCCAACCGCGCCTCCGAGCTGCT
+GGGCGGCGAGCGCGGCGAAGGCCGCAAGGTGCACCCCAACGACCACGTGAACCGGGGCCAGTCGTCCAAC
+GATACCTTTCCGACCGCCATGCACGTGGCCGCCGCGGTCGAGGTCGAGCACCGCGTGCTGCCCGCCCTGA
+AGGCGTTGCGCGGCACGCTGGCCGCCAAGAGCGCGGCGTTCTACGACATCGTCAAGATCGGTCGCACCCA
+TTTGCAGGACGCCACCCCGTTGACGCTGGGCCAGGAGATCTCCGGCTACGTGGCGCAGCTGGACCTGGCC
+GAGCAGCAGATCCGCGCGACGCTGGCCGGCCTGCACCAGCTGGCCATCGGCGGCACGGCGGTGGGCACCG
+GCCTGAACGCGCATCCGCAGTTCAGCGCCAAGGTATCGGCCGAACTGGCCCATGACACGGGCAGCGCGTT
+CGTGTCGGCGCCCAACAAGTTCCAGGCGCTGGCTTCGCACGAGGCGCTGCTGTTCGCGCACGGCGCCTTG
+AAGACGCTGGCCGCCGGCCTGATGAAGATCGCCAACGATGTGCGCTGGCTGGCCAGCGGCCCGCGCTCGG
+GGCTGGGCGAAATCAGCATTCCCGAGAACGAGCCGGGCAGCTCCATCATGCCGGGCAAGGTCAACCCGAC
+CCAGTGCGAAGCCGTCACGATGCTGGCCGCGCAGGTCATGGGCAACGACGTGGCCATCAATGTCGGCGGG
+GCCAGCGGCAACTTCGAGCTGAACGTCTTCAAGCCGCTGGTGATCCACAATTTCCTGCAGTCGGTGCGCC
+TGCTGGCCGACGGCATGGTCAGCTTCGACAAGCACTGCGCGGCCGGCATCGAGCCCAACCGCGAGCGCAT
+CACCGAGCTGGTCGAGCGTTCGCTGATGCTGGTGACTGCGCTCAACCCGCACATCGGCTACGACAAGGCC
+GCGCAGATCGCCAAGAAGGCGCACAAGGAAAACCTGTCGCTGAAAGAGGCGGCGCTGGCGCTGGGGCACC
+TGACCGAGGCGCAGTTCGCCGAGTGGGTGGTGCCGGGCGACATGACCAACGCGCGCCGCTAG
+>lcl|BX640420.1_cds_CAE43224.1_2904 [gene=glyA] [locus_tag=BP2952] [db_xref=GOA:Q7VUW7,InterPro:IPR001085,InterPro:IPR015421,InterPro:IPR015422,InterPro:IPR015424,InterPro:IPR019798] [protein=serine hydroxymethyltransferase] [protein_id=CAE43224.1] [location=complement(8611..9858)] [gbkey=CDS]
+ATGTTCAACCGCAACCTGACCCTCGACCAGGTGGATCCCGACGTCTGGGCCGCCATCCAGAAAGAAGACG
+TACGCCAGGAACAGCACATCGAGCTGATCGCGTCCGAGAACTACGCCAGCCCCGCCGTGATGCAGGCCCA
+GGGCACGCAACTGACCAACAAGTATGCGGAAGGCTACCCGGGCAAGCGCTACTACGGCGGTTGCGAGTAC
+GTCGACGTGGTCGAGCAGCTGGCCATCGACCGCCTGAAGCAGATTTTCGGCGCCGAGGCCGCCAACGTGC
+AGCCGAACTCCGGCTCGCAGGCCAACCAGGGCGTGTACATGGCGGTGCTCAAGCCGGGCGATACCGTGCT
+GGGCATGAGCCTGGCCGAAGGCGGTCACCTGACGCACGGCGCGTCGGTCAACGCCTCGGGCAAGCTGTAC
+AACTTCGTGCCCTACGGCCTGGACGCCGACGAGGTGCTGGACTACGCCCAGGTCGAGCGGCTGACCAAGG
+AACACAAGCCCAAGCTGATCGTGGCCGGCGCCTCCGCGTACGCGCTGCACATCGACTTCGAGCGCATGGC
+GCGCATCGCCCACGACAACGGCGCGCTGTTCATGGTGGACATCGCCCACTATGCCGGCCTGGTGGCCGGC
+GGCGCCTATCCCAACCCGGTGCCGCACGCCGATTTCGTCACCTCCACCACGCACAAGTCGCTGCGCGGCC
+CGCGCGGCGGCGTCATCATGATGAAGGCCGAGTTCGAGAAGGCCGTCAATTCGGCCATCTTCCCGGGCAT
+CCAGGGCGGTCCGCTGATGCACGTCATCGCGGCCAAGGCCGTGGCCTTCAAGGAAGCGCTGTCGCCCGAG
+TTCCAGGATTACGCCCAGCAGGTCGTCAAGAACGCCAAGGTGCTGGCCGATACGCTGGTCAAGCGCGGCC
+TGCGCATCGTGTCGGGCAGGACCGAAAGCCACGTCATGCTGGTGGACCTGCGTCCCAAGGGCATTACCGG
+CAAGGAAGCGGAAGCGGTGCTGGGCCAGGCCCACATCACGGTCAACAAGAACGCCATTCCCAACGACCCG
+GAAAAGCCCTTCGTGACCAGCGGCATCCGCCTGGGCACTCCGGCCATGACCACCCGCGGCTTCAAGGAGG
+CCGAGGCCGAGCTGACCGCCAACCTGATCGCCGACGTGCTGGACAATCCGCGCGACGAGGCGAACATCGC
+CGCGGTGCGCGCGCGGGTCAATGAACTGACCGCCCGCCTGCCCGTCTACGGCAACTGA
+>lcl|BX640418.1_cds_CAE42760.1_2440 [gene=icd] [locus_tag=BP2488] [db_xref=GOA:Q7VVZ2,InterPro:IPR001804,InterPro:IPR004439,InterPro:IPR019818,InterPro:IPR024084,UniProtKB/TrEMBL:Q7VVZ2] [protein=isocitrate dehydrogenase [NADP]] [protein_id=CAE42760.1] [location=complement(204636..205892)] [gbkey=CDS]
+ATGTCCTATCAACATATCAAGGTTCCCACTGGGGGCCAAAAAATCACGGTCAACGCCGATTACTCGCTGA
+ATGTGCCCGATCAGGTCATCATTCCGGTCATCGAGGGTGACGGTACGGGCGCCGACATCACGCCGGTGAT
+GATTAAGGTCGTCGACGCGGCCGTGCAGAAGGCCTATGCGGGCAAGCGCAAGATCCACTGGATGGAAGTC
+TACGCCGGCGAGAAGGCCACCAAGGTCTACGGCCCGGACGTCTGGCTGCCCGAGGAAACCCTCGACGCCG
+TCAAGGACTACGTGGTGTCGATCAAGGGTCCGCTGACCACGCCGGTCGGCGGCGGCATCCGTTCGCTGAA
+CGTGGCGCTGCGCCAGCAGCTGGACCTGTATGTCTGCCTGCGCCCGGTGCGCTACTTCAAGGGCGTGCCC
+TCGCCGGTGCGCGAGCCCGAGAAGACCGACATGGTCATCTTCCGCGAGAACTCGGAAGACATCTACGCGG
+GCATCGAGTACATGGCCGAGTCCGAGCAGGCCAAGGACCTGATCCAGTACCTGCAGACCAAGCTGGGCGT
+GACCAAGATCCGCTTCCCGAACACCTCGTCGATCGGCATCAAGCCGGTTTCGCGCGAAGGCACCGAGCGC
+CTGGTGCGCAAGGCGCTGCAGTACGCCATCGACAATGACCGCGCCTCGGTGACCCTGGTCCACAAGGGCA
+ACATCATGAAGTTCACGGAAGGCGGCTTCCGCGACTGGGGCTACGCCCTGGCCCAGAACGAGTTCGGCGC
+GCAGCCGATCGACGGCGGCCCGTGGTGCAAGTTCAAGAATCCCAAGACGGGTCGCGAGATCATCGTCAAG
+GATTCGATCGCCGACGCCTTCCTGCAGCAGATCCTGCTGCGTCCGGCCGAATACGACGTGATCGCCACGC
+TGAACCTGAACGGCGACTACATCTCCGACGCGCTGGCCGCGCAAGTGGGCGGCATCGGCATTGCCCCGGG
+CGCCAACCTGTCGGATTCCGTGGCCATGTTCGAAGCCACCCACGGCACCGCGCCGAAGTACGCGGGCAAG
+GACTACGTGAACCCCGGTTCCGAAATCCTGTCGGCCGAAATGATGCTGCGCCACATGGGCTGGACCGAGG
+CCGCCGACCTGATCATCGCCAGCATGGAGAAATCCATCCTGTCCAAGAAGGTCACCTATGACTTCGCCCG
+TCTGCTCGAAGGCGCCACCCAGGTGTCGTGCTCGGGCTTCGGTCAGGTCATGATCGACAATATGTAA
+>lcl|BX640418.1_cds_CAE42692.1_2372 [gene=pepA] [locus_tag=BP2421] [db_xref=GOA:Q7VW48,InterPro:IPR000819,InterPro:IPR008283,InterPro:IPR011356,InterPro:IPR023042] [protein=cytosol aminopeptidase] [protein_id=CAE42692.1] [location=131847..133346] [gbkey=CDS]
+ATGGAATTTAGCACACAGACCACTGCCTCCCTGCATCAGATCAAGACTGCGGCCCTGGCCGTCGGCGTCT
+TCGCCGACGGCGTGCTCAGCGCCGCCGCCGAAGTCATCGACCGCGCCAGCCACGGTGCCGTGGCCGCCGT
+GGTGAAAAGCGAGTTCCGCGGCCGCACCGGCAGCACGCTGGTGCTGCGCAGCCTGGCCGGCGTCAGCGCC
+CAGCGCGTGGTGCTGGTGGGCCTGGGCAAGCAGGCCGAATACAACGCCCGCGCGCACGCCAGCGCCGAAC
+AGGCGTTCGCCGCGGCGTGCGTCGCGGCCCAGGTGGGCGAAGGCGTGTCGACCCTGGCCGGCGTGGCCAT
+CGAGGGCGTGCCGGTGCGCGCCCGCGCGCGCAGCGCCGCCATCGCCGCGGGCGCGGCGGCCTACCATTAC
+GATGCGACGTTCGGCAAGGCCAATCGCGACGCCCGCCCCAGGTTGAAGAAAATCGTCCAGGTGGTCGACC
+GCGCGGCCTCCGCGCAGGCGCAGCTGGGCCTGCGCGAAGGCGCGGCCATCGCCCACGGCATGGAATTGAC
+CCGCACGCTGGGCAACCTGCCCGGCAACGTGTGCACGCCGGCCTATCTCGGCAATACCGCCAAGAAACTG
+GCGCGCGAATTCAAGAGCCTCAAGGTCGAGGTGCTCGAACGCAAGCAGGTCGAGGCGCTGGGCATGGGCT
+CGTTCCTCTCGGTCGCGCGCGGCTCGGAAGAACCGCTGCGCTTCATCGTGCTGCGCCATGCCGGCAAGCC
+CGCCAAGAAGGACAAGGCCGGCCCGGTCGTCCTGGTGGGCAAGGGCATCACCTTCGATGCTGGCGGCATC
+TCGCTCAAGCCGGCCGCCACGATGGACGAAATGAAGTACGACATGTGCGGCGCGGCCAGCGTGCTGGGCA
+CGTTCCGCGCCCTGGCCGAGCTGGAGCTGCCGCTGGATGTGGTGGGCCTGATCGCGGCGTGCGAGAACCT
+GCCCAGCGGCAAGGCCAACAAGCCCGGCGACGTGGTCACCAGCATGTCGGGCCAGACCATCGAGATCCTC
+AACACCGACGCCGAAGGCCGCCTGGTGCTGTGCGATGCCCTGACCTACGCCGAGCGCTTCAAGCCCGCGG
+CCGTGATCGACATCGCCACGTTGACCGGCGCCTGCGTGGTAGCCCTGGGCAACGTCAATAGCGGCCTGTT
+CTCCAAGGACGACGCGCTGGCCGACGCGCTGCTGGCCGCCAGCCGCCAGTCGCTCGACCCGGCCTGGCGC
+CTGCCGCTGGACGATGCCTACCAGGACCAGCTCAAGTCCAACTTCGCCGACATCGCCAACATCGGCGGCC
+CCCCGGCCGGCGCGGTCACGGCGGCCTGCTTCCTGTCGCGCTTCACCAAGGCTTATCCGTGGGCGCACCT
+GGACATCGCCGGCACGGCCTGGCGCGGCGGCAAGGACAAGGGCGCCACCGGCCGGCCGGTGCCGCTGCTG
+ATGCAGTACCTGCTGGACCAGGCAGGCTGA
+>lcl|BX640420.1_cds_CAE43408.1_3088 [gene=pgm] [locus_tag=BP3141] [db_xref=GOA:Q7VUF5,InterPro:IPR005841,InterPro:IPR005843,InterPro:IPR005844,InterPro:IPR005845,InterPro:IPR005846,InterPro:IPR016055,InterPro:IPR016066,UniProtKB/TrEMBL:Q7VUF5] [protein=phosphoglucomutase] [protein_id=CAE43408.1] [location=217601..218983] [gbkey=CDS]
+GTGGCGCACCCCTTTCCCGCATCGGTCTACAAGGCGTACGACATCCGTGGCTCGGTTCCCGACCAGCTCG
+ACCCGGTATTCGCCCGGGCGCTGGGCCGCGCCCTGGCCGCCAGCGCCCGCGCGCAGGGCATCGGCGCCCT
+GGTGGTCGGCCGCGACGGCCGCCTGAGCAGCCCCGACCTGGCCGGCGCGCTGCAGGAAGGCATCATGGAA
+GGCGGCGTGGACACCCTGGACATCGGCCAGGTGCCCACGCCGCTGGTCTATTTCGCGGCGCACATCCAGG
+GCACGGGCTCGGGCGTGGCGGTCACCGGCAGCCACAACCCGCCGCAGTACAACGGCTTCAAGATGATGAT
+GGGCGGCCAGGCCCTGTACGGCCCGGCCGTGCAGGCGCTGCGCCCGGCCATGCTGGCGCCGGCTGCGGCG
+CCGGGCACCTGGGGCGAACGCCGCCAGCTCGATGTCGTCCCCGCCTATATCGAGCGCATCGTGTCCGACG
+TGAAGCTGGCGCGCCCCATGAAGATCGCCGTCGACTGCGGCAATGGCGTGGCCGGCGCCCTGGCGCCGCA
+ACTGTTCCGCGCGCTGGGTTGCGAAGTGGACGAGCTCTATTGCGAGGTCGACGGCACGTTTCCCAACCAC
+CATCCCGACCCGGCCGAACCGCGCAACCTGCAGGACCTGATCGCCCATGTCACCAGCACCGACTGCGAGC
+TGGGCCTGGCCTTCGACGGCGACGGCGACCGCCTCGGCGTGGTGACCAAGTCCGGCCAGATCATCTGGCC
+CGACCGCCAGCTGATCCTGTTCGCCCGCGACGTGCTGGCCCGCTGTCCCGGCGCGACCATCATCTATGAC
+GTCAAGTGCAGCCAGCACGTGGGCGTGGCCATCGAGCAAAGCGGCGGCGTGCCGCTGATGTGGCAGACTG
+GCCATTCGCTGGTGAAGGCCAAGCTGGCCGAGACCGGCGCGCCGCTGGCCGGCGAGATGAGCGGCCATAT
+CTTCTTCAAGGAGCGCTGGTACGGCTTCGACGACGGCCTGTACACCGGCGCCCGCCTGCTGGAAATCGTC
+TCCCGCGAAACCGATGCGTCGCGCCCGCTGGAGGCCCTGCCGCAGGCGCTGTCGACCCCCGAGCTCAAGC
+TGGAGATGGCCGAGGGCGAGCCGCATGCGCTGATCGCCGCCCTGCAGCAGCAGGGCGAGTTCGCCAGCGC
+CAGCCGGCTGGTTACGATAGACGGCGTGCGCGCGGAATACCCGGACGGCTTCGGGCTGGCGCGCGCCTCC
+AATACCACCCCCGTCGTCGTGCTGCGCTTCGAAGCGGAGACCGAGCCGGGCCTGGCCCGCATCCAGCAGG
+AATTCCGCCAGCAGCTGCTGCGGCTGGCTCCGCAAGCCAAACTGCCCTTCTGA
+>lcl|BX640416.1_cds_CAE42081.1_1761 [gene=tyrB] [locus_tag=BP1795] [db_xref=GOA:Q7VXH5,InterPro:IPR000796,InterPro:IPR004838,InterPro:IPR004839,InterPro:IPR015421,InterPro:IPR015424,UniProtKB/TrEMBL:Q7VXH5] [protein=aromatic-amino-acid aminotransferase] [protein_id=CAE42081.1] [location=complement(151299..152501)] [gbkey=CDS]
+ATGAGCACTCTTTTCGCTTCCGTCGAACTCGCGCCGCGCGACCCCATTCTTGGCCTGAACGAACAGTACA
+ACGCCGATACCCGTCCCGGCAAAGTGAACCTGGGCGTGGGCGTGTACTACGACGACGAAGGCCGCATCCC
+GCTGCTTCAGGCCGTGCGCAAGGCCGAGGTGGCCCGCATCGAAGCCGCCGCCGCCCGCGGCTATCTGCCG
+ATCGAAGGCATCGCGGGGTACAACAAGGGTGCGCAGGCGCTGCTGCTGGGCGCCGACTCGCCGCTGGCCG
+CCGAAGGCCGCGTGCTGACCGCGCAGGCCCTGGGCGGCACCGGCGCGCTGAAGATCGGCGCCGACTTCCT
+GCGCCAGCTGCTGCCGCAGTCCAAGGTCCTCATCAGCGACCCCAGCTGGGAAAACCACCGCGCCCTGTTC
+GAGCGCGCCGGCTTCCCGGTCGAGACCTACGCTTATTACGATGCCGCCACCCATGGCCTGAACTTCGAAG
+CCATGCTGGCCGCCCTGCAGGCCGCGCCCGAACAGACCATCGTGGTGCTGCACGCCTGCTGCCACAACCC
+GACCGGCGTCGATCCCACGCCGCAACAGTGGGAACAGATCGCCGCCGTGGTCAAGGCGCGCAACCTGGTG
+CCGTTCCTCGACATCGCCTACCAGGGCTTCGGCGAAGGCCTGGAGCAGGACGCCGCCGTGGTGCGCATGT
+TCGCCGCGCTCGACCTGACCATGTTCATCAGCTCGTCGTTCTCCAAGTCCTTCTCGCTGTATGGCGAGCG
+GGTCGGGGCCCTGACCGTGGTGGCCGGCAGCAAGGACGAGGCCGCCCGCGTGCTCAGCCAGCTCAAGCGC
+GTGATCCGCACCAACTACTCCAACCCGCCCACCCACGGCGGCACCGTGGTGTCCACGGTCCTGAACACAC
+CCGAGCTGTTCGCGCTCTGGGAAAATGAACTGGCCGGCATGCGCGACCGCATCCGCCTGATGCGCAAGGA
+GCTGGTCGAGAAGATCAAGACCCAGGGCGTGGCGCAGGACTTCAGCTTCGTGCTGGCGCAGCGCGGCATG
+TTCTCGTACTCGGGCCTGACCGCCGCCCAGGTCGATCGCCTGCGCGAAGAGCACGGCATCTACGCGGTCT
+CCAGCGGCCGCATCTGCGTGGCCGCGCTCAACAGCCGCAACATCGACGCGGTCGCGGCCGGCATCGCCGC
+GGTGCTGAAGTAG
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tohama_I_bpertussis_minimized_features_typed.csv	Fri Feb 28 17:59:00 2025 +0000
@@ -0,0 +1,2 @@
+id,st,clonal-complex,adk,fumC,glyA,icd,pepA,pgm,tyrB
+"('lcl|BX640419.1_cds_CAE43044.1_2724', 'lcl|BX640411.1_cds_CAE40628.1_248', 'lcl|BX640420.1_cds_CAE43224.1_2904', 'lcl|BX640418.1_cds_CAE42760.1_2440', 'lcl|BX640418.1_cds_CAE42692.1_2372', 'lcl|BX640420.1_cds_CAE43408.1_3088', 'lcl|BX640416.1_cds_CAE42081.1_1761')",1,ST-2 complex,1,1,1,1,1,1,1