Mercurial > repos > iuc > psauron
changeset 0:afe3cab23563 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/psauron commit 1aa7cbac40ac6cd332040b91edffa451cb1bbd21
| author | iuc |
|---|---|
| date | Fri, 20 Feb 2026 07:43:34 +0000 |
| parents | |
| children | |
| files | macros.xml psauron.xml test-data/psauron_output.csv test-data/seq_test_CDS.fa test-data/seq_test_protein.faa |
| diffstat | 5 files changed, 224 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Feb 20 07:43:34 2026 +0000 @@ -0,0 +1,19 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">psauron</requirement> + <requirement type="package" version="69.5.1">setuptools</requirement> + </requirements> + </xml> + <token name="@TOOL_VERSION@">1.1.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">25.0</token> + <xml name="citations"> + <citations> + <citation type="doi">10.1093/nargab/lqae189</citation> + </citations> + </xml> + <xml name="version_command"> + <version_command>psauron --version</version_command> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/psauron.xml Fri Feb 20 07:43:34 2026 +0000 @@ -0,0 +1,73 @@ +<tool id="psauron" name="Psauron" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Machine learning model for rapid assessment of protein coding gene annotation</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <version_command>psauron --version</version_command> + <command detect_errors="exit_code"> + + <![CDATA[ + psauron + -i '$i' + -c + --minimum-length '$minimum_length' + --exclude '$exclude' + --inframe '$inframe' + --outframe '$outframe' + $s + $p + -o '$psauron_score_file' + ]]> + </command> + <inputs> + <param argument="-i" type="data" format="fasta" label="Input fasta" help="FASTA with spliced CDS sequence or protein sequence. A spliced CDS fasta can be created from a GTF/GFF and a reference FASTA by using gffread."/> + <param argument="--minimum-length" type="integer" min="0" value="5" optional="true" label="Exclude all proteins shorter than m amino acids"/> + <param argument="--exclude" type="text" value="None" optional="true" label="Exclude any CDS where FASTA description contains given text (case invariant), e.g. hypothetical"/> + <param argument="--inframe" type="float" min="0" max="1" value="0.5" optional="true" label="Probability threshold used to determine final psauron score, in-frame, higher number decreases sensitivity and increases specificity, default=0.5"/> + <param argument="--outframe" type="float" min="0" max="1" value="0.5" optional="true" label="Probability threshold used to determine final psauron score, out-of-frame, higher number increases sensitivity and decreases specificity, default=0.5"/> + <!--c option not included--> + <param argument="-s" type="boolean" label="Score only the in-frame CDS, which may lower accuracy of the model" truevalue="-s" falsevalue="" checked="false" /> + <param argument="-p" type="boolean" label="Set to Yes if your FASTA contains amino acid protein sequence, which may lower accuracy of the model" truevalue="-p" falsevalue="" checked="false" /> + <!--v option not included--> + </inputs> + <outputs> + <data name="psauron_score_file" format="csv" label="${tool.name} on ${on_string}: csv with scores for all reading frames"></data> + </outputs> + <tests> + <!-- TEST 1 : nucleotide coding sequences (CDS) --> + <test expect_num_outputs="1"> + <param name="i" value="seq_test_CDS.fa" ftype="fasta"/> + <output name="psauron_score_file" ftype="csv"> + <assert_contents> + <has_text_matching expression="psauron score" /> + </assert_contents> + </output> + </test> + <!-- TEST 2 : protein (amino acid) sequence --> + <test expect_num_outputs="1"> + <param name="i" value="seq_test_protein.faa" ftype="fasta"/> + <param name="p" value="true"/> + <output name="psauron_score_file" ftype="csv"> + <assert_contents> + <has_text_matching expression="psauron score" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**PSAURON** + +PSAURON is a machine learning model for rapid assessment of protein coding gene annotation. + +PSAURON (Protein Sequence Assessment Using a Reference ORF Network), is a software tool developed to help assess the quality of protein-coding gene annotations. Utilizing a machine learning model trained on a diverse dataset from over 1000 plant and animal genomes, PSAURON assigns a score to coding DNA or protein sequence that reflects the likelihood that the sequence is a genuine protein-coding region. + +PSAURON scores can be used for genome-wide protein annotation assessment as well as the rapid identification of potentially spurious annotated proteins. Validation against established benchmarks demonstrates PSAURON’s effectiveness and correlation with recognized measures of protein quality, highlighting its potential use as a widely applicable method to evaluate precision in gene annotation. + +]]></help> + <citations> + <citation type="doi">10.1093/nargab/lqae189</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/psauron_output.csv Fri Feb 20 07:43:34 2026 +0000 @@ -0,0 +1,13 @@ +/home/scorrear/miniforge3/envs/mulled-v1-050aca60ede0ee0da4e1db4d21695dd771ed2581dbbc5af0ca23095e87e93def/bin/psauron -i /tmp/tmpzefkmwgx/files/a/5/5/dataset_a5554c67-b122-4232-b54a-e7c14eaa04b1.dat -c --minimum-length 5 --exclude --inframe 0.5 --outframe 0.5 -p -o psauron_score.csv +psauron score: 80.0 +description,psauron_is_protein,in-frame_score +NP_414542.1,False,0.45405 +NP_414543.1,True,0.99368 +NP_414544.1,True,0.99918 +NP_414545.1,True,0.99832 +NP_414546.1,False,0.3545 +NP_414547.1,True,0.99996 +NP_414548.1,True,0.9967 +NP_414549.1,True,0.99972 +NP_414550.1,True,0.99454 +NP_414551.1,True,0.99996
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/seq_test_CDS.fa Fri Feb 20 07:43:34 2026 +0000 @@ -0,0 +1,65 @@ +>lcl|NC_000913.3_cds_NP_414542.1_1 [gene=thrL] [locus_tag=b0001] [db_xref=UniProtKB/Swiss-Prot:P0AD86] [protein=thr operon leader peptide] [protein_id=NP_414542.1] [location=190..255] [gbkey=CDS] +ATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGA +>lcl|NC_000913.3_cds_NP_414543.1_2 [gene=thrA] [locus_tag=b0002] [db_xref=UniProtKB/Swiss-Prot:P00561] [protein=fused aspartate kinase/homoserine dehydrogenase 1] [protein_id=NP_414543.1] [location=337..2799] [gbkey=CDS] +ATGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAA +TGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTGAAAAAA +CCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCC +GCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGG +CATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAAATGTCGATCGCCATTA +TGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACTGTTATCGATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTAC +CTCGAATCTACCGTCGATATTGCTGAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGAT +GGCAGGTTTCACCGCCGGTAATGAAAAAGGCGAACTGGTGGTGCTTGGACGCAACGGTTCCGACTACTCTGCTGCGGTGC +TGGCTGCCTGTTTACGCGCCGATTGTTGCGAGATTTGGACGGACGTTGACGGGGTCTATACCTGCGACCCGCGTCAGGTG +CCCGATGCGAGGTTGTTGAAGTCGATGTCCTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCC +CCGCACCATTACCCCCATCGCCCAGTTCCAGATCCCTTGCCTGATTAAAAATACCGGAAATCCTCAAGCACCAGGTACGC +TCATTGGTGCCAGCCGTGATGAAGACGAATTACCGGTCAAGGGCATTTCCAATCTGAATAACATGGCAATGTTCAGCGTT +TCTGGTCCGGGGATGAAAGGGATGGTCGGCATGGCGGCGCGCGTCTTTGCAGCGATGTCACGCGCCCGTATTTCCGTGGT +GCTGATTACGCAATCATCTTCCGAATACAGCATCAGTTTCTGCGTTCCACAAAGCGACTGTGTGCGAGCTGAACGGGCAA +TGCAGGAAGAGTTCTACCTGGAACTGAAAGAAGGCTTACTGGAGCCGCTGGCAGTGACGGAACGGCTGGCCATTATCTCG +GTGGTAGGTGATGGTATGCGCACCTTGCGTGGGATCTCGGCGAAATTCTTTGCCGCACTGGCCCGCGCCAATATCAACAT +TGTCGCCATTGCTCAGGGATCTTCTGAACGCTCAATCTCTGTCGTGGTAAATAACGATGATGCGACCACTGGCGTGCGCG +TTACTCATCAGATGCTGTTCAATACCGATCAGGTTATCGAAGTGTTTGTGATTGGCGTCGGTGGCGTTGGCGGTGCGCTG +CTGGAGCAACTGAAGCGTCAGCAAAGCTGGCTGAAGAATAAACATATCGACTTACGTGTCTGCGGTGTTGCCAACTCGAA +GGCTCTGCTCACCAATGTACATGGCCTTAATCTGGAAAACTGGCAGGAAGAACTGGCGCAAGCCAAAGAGCCGTTTAATC +TCGGGCGCTTAATTCGCCTCGTGAAAGAATATCATCTGCTGAACCCGGTCATTGTTGACTGCACTTCCAGCCAGGCAGTG +GCGGATCAATATGCCGACTTCCTGCGCGAAGGTTTCCACGTTGTCACGCCGAACAAAAAGGCCAACACCTCGTCGATGGA +TTACTACCATCAGTTGCGTTATGCGGCGGAAAAATCGCGGCGTAAATTCCTCTATGACACCAACGTTGGGGCTGGATTAC +CGGTTATTGAGAACCTGCAAAATCTGCTCAATGCAGGTGATGAATTGATGAAGTTCTCCGGCATTCTTTCTGGTTCGCTT +TCTTATATCTTCGGCAAGTTAGACGAAGGCATGAGTTTCTCCGAGGCGACCACGCTGGCGCGGGAAATGGGTTATACCGA +ACCGGACCCGCGAGATGATCTTTCTGGTATGGATGTGGCGCGTAAACTATTGATTCTCGCTCGTGAAACGGGACGTGAAC +TGGAGCTGGCGGATATTGAAATTGAACCTGTGCTGCCCGCAGAGTTTAACGCCGAGGGTGATGTTGCCGCTTTTATGGCG +AATCTGTCACAACTCGACGATCTCTTTGCCGCGCGCGTGGCGAAGGCCCGTGATGAAGGAAAAGTTTTGCGCTATGTTGG +CAATATTGATGAAGATGGCGTCTGCCGCGTGAAGATTGCCGAAGTGGATGGTAATGATCCGCTGTTCAAAGTGAAAAATG +GCGAAAACGCCCTGGCCTTCTATAGCCACTATTATCAGCCGCTGCCGTTGGTACTGCGCGGATATGGTGCGGGCAATGAC +GTTACAGCTGCCGGTGTCTTTGCTGATCTGCTACGTACCCTCTCATGGAAGTTAGGAGTCTGA +>lcl|NC_000913.3_cds_NP_414544.1_3 [gene=thrB] [locus_tag=b0003] [db_xref=UniProtKB/Swiss-Prot:P00547] [protein=homoserine kinase] [protein_id=NP_414544.1] [location=2801..3733] [gbkey=CDS] +ATGGTTAAAGTTTATGCCCCGGCTTCCAGTGCCAATATGAGCGTCGGGTTTGATGTGCTCGGGGCGGCGGTGACACCTGT +TGATGGTGCATTGCTCGGAGATGTAGTCACGGTTGAGGCGGCAGAGACATTCAGTCTCAACAACCTCGGACGCTTTGCCG +ATAAGCTGCCGTCAGAACCACGGGAAAATATCGTTTATCAGTGCTGGGAGCGTTTTTGCCAGGAACTGGGTAAGCAAATT +CCAGTGGCGATGACCCTGGAAAAGAATATGCCGATCGGTTCGGGCTTAGGCTCCAGTGCCTGTTCGGTGGTCGCGGCGCT +GATGGCGATGAATGAACACTGCGGCAAGCCGCTTAATGACACTCGTTTGCTGGCTTTGATGGGCGAGCTGGAAGGCCGTA +TCTCCGGCAGCATTCATTACGACAACGTGGCACCGTGTTTTCTCGGTGGTATGCAGTTGATGATCGAAGAAAACGACATC +ATCAGCCAGCAAGTGCCAGGGTTTGATGAGTGGCTGTGGGTGCTGGCGTATCCGGGGATTAAAGTCTCGACGGCAGAAGC +CAGGGCTATTTTACCGGCGCAGTATCGCCGCCAGGATTGCATTGCGCACGGGCGACATCTGGCAGGCTTCATTCACGCCT +GCTATTCCCGTCAGCCTGAGCTTGCCGCGAAGCTGATGAAAGATGTTATCGCTGAACCCTACCGTGAACGGTTACTGCCA +GGCTTCCGGCAGGCGCGGCAGGCGGTCGCGGAAATCGGCGCGGTAGCGAGCGGTATCTCCGGCTCCGGCCCGACCTTGTT +CGCTCTGTGTGACAAGCCGGAAACCGCCCAGCGCGTTGCCGACTGGTTGGGTAAGAACTACCTGCAAAATCAGGAAGGTT +TTGTTCATATTTGCCGGCTGGATACGGCGGGCGCACGAGTACTGGAAAACTAA +>lcl|NC_000913.3_cds_NP_414545.1_4 [gene=thrC] [locus_tag=b0004] [db_xref=UniProtKB/Swiss-Prot:P00934] [protein=threonine synthase] [protein_id=NP_414545.1] [location=3734..5020] [gbkey=CDS] +ATGAAACTCTACAATCTGAAAGATCACAACGAGCAGGTCAGCTTTGCGCAAGCCGTAACCCAGGGGTTGGGCAAAAATCA +GGGGCTGTTTTTTCCGCACGACCTGCCGGAATTCAGCCTGACTGAAATTGATGAGATGCTGAAGCTGGATTTTGTCACCC +GCAGTGCGAAGATCCTCTCGGCGTTTATTGGTGATGAAATCCCACAGGAAATCCTGGAAGAGCGCGTGCGCGCGGCGTTT +GCCTTCCCGGCTCCGGTCGCCAATGTTGAAAGCGATGTCGGTTGTCTGGAATTGTTCCACGGGCCAACGCTGGCATTTAA +AGATTTCGGCGGTCGCTTTATGGCACAAATGCTGACCCATATTGCGGGTGATAAGCCAGTGACCATTCTGACCGCGACCT +CCGGTGATACCGGAGCGGCAGTGGCTCATGCTTTCTACGGTTTACCGAATGTGAAAGTGGTTATCCTCTATCCACGAGGC +AAAATCAGTCCACTGCAAGAAAAACTGTTCTGTACATTGGGCGGCAATATCGAAACTGTTGCCATCGACGGCGATTTCGA +TGCCTGTCAGGCGCTGGTGAAGCAGGCGTTTGATGATGAAGAACTGAAAGTGGCGCTAGGGTTAAACTCGGCTAACTCGA +TTAACATCAGCCGTTTGCTGGCGCAGATTTGCTACTACTTTGAAGCTGTTGCGCAGCTGCCGCAGGAGACGCGCAACCAG +CTGGTTGTCTCGGTGCCAAGCGGAAACTTCGGCGATTTGACGGCGGGTCTGCTGGCGAAGTCACTCGGTCTGCCGGTGAA +ACGTTTTATTGCTGCGACCAACGTGAACGATACCGTGCCACGTTTCCTGCACGACGGTCAGTGGTCACCCAAAGCGACTC +AGGCGACGTTATCCAACGCGATGGACGTGAGTCAGCCGAACAACTGGCCGCGTGTGGAAGAGTTGTTCCGCCGCAAAATC +TGGCAACTGAAAGAGCTGGGTTATGCAGCCGTGGATGATGAAACCACGCAACAGACAATGCGTGAGTTAAAAGAACTGGG +CTACACTTCGGAGCCGCACGCTGCCGTAGCTTATCGTGCGCTGCGTGATCAGTTGAATCCAGGCGAATATGGCTTGTTCC +TCGGCACCGCGCATCCGGCGAAATTTAAAGAGAGCGTGGAAGCGATTCTCGGTGAAACGTTGGATCTGCCAAAAGAGCTG +GCAGAACGTGCTGATTTACCCTTGCTTTCACATAATCTGCCCGCCGATTTTGCTGCGTTGCGTAAATTGATGATGAATCA +TCAGTAA \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/seq_test_protein.faa Fri Feb 20 07:43:34 2026 +0000 @@ -0,0 +1,54 @@ +>NP_414542.1 thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655] +MKRISTTITTTITITTGNGAG +>NP_414543.1 fused aspartate kinase/homoserine dehydrogenase 1 [Escherichia coli str. K-12 substr. MG1655] +MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAA +AQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHY +LESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQV +PDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSV +SGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIIS +VVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGAL +LEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAV +ADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSL +SYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMA +NLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGND +VTAAGVFADLLRTLSWKLGV +>NP_414544.1 homoserine kinase [Escherichia coli str. K-12 substr. MG1655] +MVKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAETFSLNNLGRFADKLPSEPRENIVYQCWERFCQELGKQI +PVAMTLEKNMPIGSGLGSSACSVVAALMAMNEHCGKPLNDTRLLALMGELEGRISGSIHYDNVAPCFLGGMQLMIEENDI +ISQQVPGFDEWLWVLAYPGIKVSTAEARAILPAQYRRQDCIAHGRHLAGFIHACYSRQPELAAKLMKDVIAEPYRERLLP +GFRQARQAVAEIGAVASGISGSGPTLFALCDKPETAQRVADWLGKNYLQNQEGFVHICRLDTAGARVLEN +>NP_414545.1 threonine synthase [Escherichia coli str. K-12 substr. MG1655] +MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILSAFIGDEIPQEILEERVRAAF +AFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRG +KISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQ +LVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKI +WQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKEL +AERADLPLLSHNLPADFAALRKLMMNHQ +>NP_414546.1 DUF2502 domain-containing protein YaaX [Escherichia coli str. K-12 substr. MG1655] +MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDHGWWKQHYEWRGNRWHLHGPPPPPRHH +KKAPHDHHGGHGPGKHHR +>NP_414547.1 DNA binding and peroxide stress response protein YaaA [Escherichia coli str. K-12 substr. MG1655] +MLILISPAKTLDYQSPLTTTRYTLPELLDNSQQLIHEARKLTPPQISTLMRISDKLAGINAARFHDWQPDFTPANARQAI +LAFKGDVYTGLQAETFSEDDFDFAQQHLRMLSGLYGVLRPLDLMQPYRLEMGIRLENARGKDLYQFWGDIITNKLNEALA +AQGDNVVINLASDEYFKSVKPKKLNAEIIKPVFLDEKNGKFKIISFYAKKARGLMSRFIIENRLTKPEQLTGFNSEGYFF +DEDSSSNGELVFKRYEQR +>NP_414548.1 putative transporter YaaJ [Escherichia coli str. K-12 substr. MG1655] +MPDFFSFINSVLWGSVMIYLLFGAGCWFTFRTGFVQFRYIRQFGKSLKNSIHPQPGGLTSFQSLCTSLAARVGSGNLAGV +ALAITAGGPGAVFWMWVAAFIGMATSFAECSLAQLYKERDVNGQFRGGPAWYMARGLGMRWMGVLFAVFLLIAYGIIFSG +VQANAVARALSFSFDFPPLVTGIILAVFTLLAITRGLHGVARLMQGFVPLMAIIWVLTSLVICVMNIGQLPHVIWSIFES +AFGWQEAAGGAAGYTLSQAITNGFQRSMFSNEAGMGSTPNAAAAAASWPPHPAAQGIVQMIGIFIDTLVICTASAMLILL +AGNGTTYMPLEGIQLIQKAMRVLMGSWGAEFVTLVVILFAFSSIVANYIYAENNLFFLRLNNPKAIWCLRICTFATVIGG +TLLSLPLMWQLADIIMACMAITNLTAILLLSPVVHTIASDYLRQRKLGVRPVFDPLRYPDIGRQLSPDAWDDVSQE +>NP_414549.1 transaldolase B [Escherichia coli str. K-12 substr. MG1655] +MTDKLTSLRQYTTVVADTGDIAAMKLYQPQDATTNPSLILNAAQIPEYRKLIDDAVAWAKQQSNDRAQQIVDATDKLAVN +IGLEILKLVPGRISTEVDARLSYDTEASIAKAKRLIKLYNDAGISNDRILIKLASTWQGIRAAEQLEKEGINCNLTLLFS +FAQARACAEAGVFLISPFVGRILDWYKANTDKKEYAPAEDPGVVSVSEIYQYYKEHGYETVVMGASFRNIGEILELAGCD +RLTIAPALLKELAESEGAIERKLSYTGEVKARPARITESEFLWQHNQDPMAVDKLAEGIRKFAIDQEKLEKMIGDLL +>NP_414550.1 molybdopterin adenylyltransferase [Escherichia coli str. K-12 substr. MG1655] +MNTLRIGLVSISDRASSGVYQDKGIPALEEWLTSALTTPFELETRLIPDEQAIIEQTLCELVDEMSCHLVLTTGGTGPAR +RDVTPDATLAVADREMPGFGEQMRQISLHFVPTAILSRQVGVIRKQALILNLPGQPKSIKETLEGVKDAEGNVVVHGIFA +SVPYCIQLLEGPYVETAPEVVAAFRPKSARRDVSE +>NP_414551.1 acetate/succinate:H(+) symporter [Escherichia coli str. K-12 substr. MG1655] +MGNTKLANPAPLGLMGFGMTTILLNLHNVGYFALDGIILAMGIFYGGIAQIFAGLLEYKKGNTFGLTAFTSYGSFWLTLV +AILLMPKLGLTDAPNAQFLGVYLGLWGVFTLFMFFGTLKGARVLQFVFFSLTVLFALLAIGNIAGNAAIIHFAGWIGLIC +GASAIYLAMGEVLNEQFGRTVLPIGESH \ No newline at end of file
