changeset 10:5e57b0a0e8d0 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/uniprot_rest_interface commit daea630529977907a5fcfec24baeb920e41a4dd2
author bgruening
date Thu, 31 Jul 2025 17:11:00 +0000
parents f31d8d59ffb6
children
files test-data/test1_retrieve.fasta test-data/test2_map.tab uniprot.py uniprot.xml
diffstat 4 files changed, 46 insertions(+), 126 deletions(-) [+]
line wrap: on
line diff
--- a/test-data/test1_retrieve.fasta	Tue Aug 06 14:49:34 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
->sp|Q13685|AAMP_HUMAN Angio-associated migratory cell protein OS=Homo sapiens OX=9606 GN=AAMP PE=1 SV=2
-MESESESGAAADTPPLETLSFHGDEEIIEVVELDPGPPDPDDLAQEMEDVDFEEEEEEEG
-NEEGWVLEPQEGVVGSMEGPDDSEVTFALHSASVFCVSLDPKTNTLAVTGGEDDKAFVWR
-LSDGELLFECAGHKDSVTCAGFSHDSTLVATGDMSGLLKVWQVDTKEEVWSFEAGDLEWM
-EWHPRAPVLLAGTADGNTWMWKVPNGDCKTFQGPNCPATCGRVLPDGKRAVVGYEDGTIR
-IWDLKQGSPIHVLKGTEGHQGPLTCVAANQDGSLILTGSVDCQAKLVSATTGKVVGVFRP
-ETVASQPSLGEGEESESNSVESLGFCSVMPLAAVGYLDGTLAIYDLATQTLRHQCQHQSG
-IVQLLWEAGTAVVYTCSLDGIVRLWDARTGRLLTDYRGHTAEILDFALSKDASLVVTTSG
-DHKAKVFCVQRPDR
->tr|M5B8V9|M5B8V9_9MICO ABC transporter involved in the biosynthesis of cytochrom bd, fused permease/ATP-binding protein OS=Clavibacter michiganensis subsp. nebraskensis NCPPB 2581 OX=1097677 GN=CMN_01519 PE=4 SV=1
-MKPLDPRLLRHSASARTMLAVGAVVSVVQTAALVAFCWSLTQLVVRAIGGADQAALAPVL
-ALAVGSAVVRGAAAWLLDVTGARGAARVTAELRRRALRAIADLGPAWTAARSRGRLATIV
-GPGLDALDPYFARYVPQLILTALATPIVVAVLLLSDPLTGVTVLVTLPVIPVFMVLVGWA
-TQEVQRRQWSRLTELASSFLEVVDGLSTLLVFRRARRQTARIRRVTEEYRVETMRVLRIS
-FLSGFVLELAASLSVALVAVSVGVRLIGGQLDLEVGLFVLLLAPEAFLPIRQVGVQFHAA
-AEGVAAADDVLGILEEERAARATRPVPGPATATPPAGDALVIRDLAVARGDRAVLSGVSA
-RFPRGRVTAVTGPSGVGKSSLLGAMLGHLPAGGAAGWIDDDASSLRPPVPTEIAWAGQRP
-GLVAGTVRENVALGVADPDDALVRRALALAAADGIDPDLVLGVGGQGLSGGQAQRVAVAR
-AVHRALALDCPLVLLDEPSSALDAATEERLAAGIRALADQGRAVVVVTHRGALVRAADAE
-LRLGGASGEDDAPAAVGSSVGAGRVAPARIAPEPAWRAQVAP
->sp|S0DS17|APF8_GIBF5 Cytochrome P450 monooxygenase apf8 OS=Gibberella fujikuroi (strain CBS 195.34 / IMI 58289 / NRRL A-6831) OX=1279085 GN=apf8 PE=1 SV=1
-MSYQSILLRQVNSLCDNLEEVARDENGGLIDMAMQSDYFTFDVMSEVIFGMAYNALKDTS
-YRFVTGALGSSNIRIGTLVQSPLPAMCRIDKYLFPESIQGRNKFLGFIGSLLRDRSKASF
-AGNGNVFSFLETAKDPDGGNQLSKSEIRAECATLVAAGTDTSSSTLAATLFYLSRNSKCY
-SRVSEEVRNAFSSHQDIKIGPELNSCVYLRACIEETLRMSPPVGAALWREIGPGGMNIGP
-LTLPAGVDVGTGIYSLHHNAAYHPEPFKYLPERWLVGEGSSTSESVELARSAFAPFSRGP
-RSCVGKGFAYHELTLTIAHILHRFDFSATEEDFALRHGSEGPGGINEFLLHDHVTGARSG
-PLLQFSMRR
->tr|M5BAG7|M5BAG7_9MICO ABC transporter, fused permease/ABC transporter involved in the biosynthesis of cytochrom bd, fused permease/ATP-binding protein OS=Clavibacter michiganensis subsp. nebraskensis NCPPB 2581 OX=1097677 GN=cydC PE=4 SV=1
-MNRDGVLRLAQPPTRRTLPGLLAGLASAVGAVALLATSAWLITRASEQPPILFLGMAIVG
-VRAFALGRAAFRYLERITSHDAAFRALATLRVGVFERLLPFAPAGLRDTRRGDLLARLVG
-DVDRLQDLPLRVVQPLAVSVVVQAASVAVVGAVLPAAGIALAVVLGVALVVGIGATTALA
-GRAETRIAPLRARLQDLVLDFVGGLDVLTSFGAVDDRLAAIDRAATELRRAELRSAAAAG
-VTTGVVLAGTGAVAGWTVLQGVPGLASGTLDPAWLALAALVPLALVEQATAVPLAVQAWR
-RVRTSAERVAGVVPETVPDEIPREPDDAADAQPVTADASPAGTTLEVRDLVTRWPGADED
-ALAPVSLVVRPGETVVVRGPSGSGKSSLAAALARFLESRGAYELDGRDARSMPPSAVRRI
-VGLCEQAPHLFDASIRQNLLFARDDATDDELVAVLARVGLADWTAGRGGLDARVGDRGGL
-VSGGQAQRIALARALLADFPVLVLDEPTADVDAERARAVLRDVLTAARDRGPGVLLLTHT
-DVPHDLVDRTVELRVAGDRVRTE
->tr|A0A077Z587|A0A077Z587_TRITR Kelch 3 and Kelch 4 and Cytochrom B561 domain con taining protein OS=Trichuris trichiura OX=36087 GN=TTRE_0000309301 PE=4 SV=1
-MGSQQAADETQKVVERIILNINVRKDKRSFGLGIKIKKGNVFVSSIRPGSIAEDHFKLYD
-VIKDVNGSRIDSRELCRDLIRTHKVLTVTVERELSKNIEQPGQGDRKSSTECPYLETAQP
-FSEMEKNQWSKLPADVREILKKQFATASQYGLQAPARTEQPTQTEHRKVSVLENIVRFEI
-TSDVPRDKSLRKPSDGQQLYKIVASYQCISLLADQMIIYLWLRIGWLILTFNLFVTQAVS
-LHWKRVAEYGKNPRPQARKHAAFGYDMLRHYVVLFGGQGERDENYNDTWIFDVLAGRWYA
-VHRNVAPPAMHGAAFGLNDGKFYLVGGCDQTQCFDDVWVFLTSTFEWHKLAPKGELRPTG
-RLGAIGGFYATGSHIIYGLGTTINDQFLEDIFFFDIPMQRWYKIIERLFVYSPFTPHPRR
-HMSSLMVSPSEVLLFGGCSKHGQCPTGDAWLFNVQSHVWQSLPFCPSPRMEASAVTLLSS
-DDVEPKPAAVLIYGGRRYTSQHLLGSPMLEPDEVVIYDLVGKSWSIRSSKYEDSSGLPEQ
-RSAASTASTLTEVYMFGGEAYDGRLLDDFWMLAGDWRESATNQKCQQVNFNLLALHGLLM
-SASFALILPAGALWALYKSARVTKQKKSGGWTMTHTIAQTCGMVIVAAGAVCSIQAKRDN
-GKHFGSVHGVLGIIVIALLCVQVALGFSKSLIRTEAQRRTINRVHFWLAIVLLPLAFLNI
-ILGLQLIAVPVGLLLGFFVHIFCLLAALGLILPILRFRKANRSVAFPPPNDD
->tr|Q0P8A9|Q0P8A9_CAMJE Putative formate dehydrogenase, cytochrom B subunit OS=Campylobacter jejuni subsp. jejuni serotype O:2 (strain ATCC 700819 / NCTC 11168) OX=192222 GN=fdhC PE=3 SV=1
-MRKVFVTLLLSVVSLFAYGSERMGQDTQIWDFHRITNIPNYDTFGKLWTTLQGEYIATIA
-LIAVIAVLSAFALHYMVIGPKQFSHDGKKIYAFTLFERLFHFIAAISWVILVPTGFVMMF
-GEVFGGGVFVRVCKNLHAFATILFIISIIPMFLCWIKRMLPASYDIRWMMIVGGYLSKIK
-RPVPAGKFNFGQKSWYYIAVFGGFLMIITGGFMYFLDFNSTAIQGLFGLTQIELLRISAI
-VHNFLGIVCAVFFGVHIYMAVFAIKGSIHSMISGYKEEEEVYILHSYWYKELSNKKQIEP
-SFSYDPNVKI
->tr|A0A077ZHN8|A0A077ZHN8_TRITR HAMP and MCPsignal and TarH and Cytochrom B N dom ain containing protein OS=Trichuris trichiura OX=36087 GN=TTRE_0000819801 PE=4 SV=1
-MEFRGFFPRSDRPLINMVHVSCGISILVLMVVRLLLRLKYPTPPIIPKPKPMMTGLAHLG
-HLVIYLLFIALPVIGLVMMYNRGNPWFAFGLTMPYASEANFERVDSLKSWHETLANLGYF
-VIGSALAGYFLWQADRDQRDVTAEIEIRTGLANSSDFLRSARINMIQAGAASRIAEMEAM
-KRNIAQAESEIKQSQQGYRAYQNRPVKTPADEALDTELNQRFQAYITGMQPMLKYAKNGM
-FEAIINHESEQIRPLDNAYTDILNKAVKIRSTRANQLAELAHQRTRLGGMFMIGAFVLAL
-VMTLITFMVLRRIVIRPLQHAAQRIEKIASGDLTMNDEPAGRNEIGRLSRHLQQMQHSLG
-MTVGTVRQGAEEIYRGTSEISAGNADLSSRTEEQAAAIEQTAASMEQLTATVKQNADNAH
-HASKLAQEASIKASDGGQTVSGVVKTMGAISTSSKKISEITAVINSIAFQTNILALNAAV
-EAARAGEQGRGFAVVASEVRTLASRSAQAAKEIEGLISESVRLIDLGSDEVATAGKTMST
-IVDAVASVTHIMQEIAAASDEQSRGITQVSQAISEMDKVTQQNASLVEEASAAAVSLEEQ
-AARLTEAVDVFRLHKHSVSAEPRGAGEPVSFATV
->tr|A0A077ZFY8|A0A077ZFY8_TRITR PmbA TldD and Mur ligase M and Mur ligase and Mur ligase C and Cytochrom B562 domain containing protein OS=Trichuris trichiura OX=36087 GN=TTRE_0000758701 PE=3 SV=1
-MGGLAMLARQLGHEVTGSDANVYPPMSTLLEKQGIELIQGYDASQLDPQPDLVIIGNAMT
-RGNPCVEAVLEKNIPYMSGPQWLHDFVLRDRWVLAVAGTHGKTTTAGMATWILEQCGYKP
-GFVIGGVPGNFEVSARLGESNFFVIEADEYDCAFFDKRSKFVHYCPRTLILNNLEFDHAD
-IFDDLKAIQKQFHHLVRIVPGQGRIIWPENDINLKQTMAMGCWSEQELVGEQGHWQAKKL
-TTDASEWEVLLDGEKVGEVKWSLVGEHNMHNGLMAIAAARHVGVAPADAANALGSFINAR
-RRLELRGEANGVTVYDDFAHHPTAILATLAALRGKVGGTARIIAVLEPRSNTMKMGICKD
-DLAPSLGRADEVFLLQPAHIPWQVAEVAEACVQPAHWSGDVDTLADMVVKTAQPGDHILV
-MSNGGFGGIHQKLLDDFRETLYIMALAMKVISQVEAQRKILEEAVSTALELASGKSDGAE
-VAVSKTTGISVSTRYGEVENVEFNSDGALGITVYHQNRKGSASSTDLSPQAIARTVQAAL
-DIARYTSPDPCAGVADKELLAFDAPDLDLFHPAEVSPDEAIELAARAEQAALQADKRITN
-TEGGSFNSHYGVKVFGNSHGMLQGYCSTRHSLSSCVIAEENGDMERDYAYTIGRAMSDLQ
-TPEWVGADCARRTLSRLSPRKLSTMKAPVIFANEVATGLFGHLVGAIAGGSVYRKSTFLL
-DSLGKQILPDWLTIEEHPHLLKGLASTPFDSEGVRTERRDIIKDGILTQWLLTSYSARKL
-GLKSTGHAGGIHNWRIAGQGLSFEQMLKEMGTGLVVTELMGQGVSAITGDYSRGAAGFWV
-ENGEIQYPVSEITIAVSSLVFSSASFAADLEDNMETLNDNLKVVEKADNAAQVKDALTKM
-RAAALDAQKATPPKLEGKSPDSPEMKDFRHGFDILVGQIDDALKLANEGKVKEAQAAAEQ
-LKTTRNAYHQKYR
->sp|O14639|ABLM1_HUMAN Actin-binding LIM protein 1 OS=Homo sapiens OX=9606 GN=ABLIM1 PE=1 SV=3
-MPAFLGLKCLGKLCSSEKSKVTSSERTSARGSNRKRLIVEDRRVSGTSFTAHRRATITHL
-LYLCPKDYCPRGRVCNSVDPFVAHPQDPHHPSEKPVIHCHKCGEPCKGEVLRVQTKHFHI
-KCFTCKVCGCDLAQGGFFIKNGEYLCTLDYQRMYGTRCHGCGEFVEGEVVTALGKTYHPN
-CFACTICKRPFPPGDRVTFNGRDCLCQLCAQPMSSSPKETTFSSNCAGCGRDIKNGQALL
-ALDKQWHLGCFKCKSCGKVLTGEYISKDGAPYCEKDYQGLFGVKCEACHQFITGKVLEAG
-DKHYHPSCARCSRCNQMFTEGEEMYLQGSTVWHPDCKQSTKTEEKLRPTRTSSESIYSRP
-GSSIPGSPGHTIYAKVDNEILDYKDLAAIPKVKAIYDIERPDLITYEPFYTSGYDDKQER
-QSLGESPRTLSPTPSAEGYQDVRDRMIHRSTSQGSINSPVYSRHSYTPTTSRSPQHFHRP
-GNEPSSGRNSPLPYRPDSRPLTPTYAQAPKHFHVPDQGINIYRKPPIYKQHAALAAQSKS
-SEDIIKFSKFPAAQAPDPSETPKIETDHWPGPPSFAVVGPDMKRRSSGREEDDEELLRRR
-QLQEEQLMKLNSGLGQLILKEEMEKESRERSSLLASRYDSPINSASHIPSSKTASLPGYG
-RNGLHRPVSTDFAQYNSYGDVSGGVRDYQTLPDGHMPAMRMDRGVSMPNMLEPKIFPYEM
-LMVTNRGRNKILREVDRTRLERHLAPEVFREIFGMSIQEFDRLPLWRRNDMKKKAKLF
--- a/test-data/test2_map.tab	Tue Aug 06 14:49:34 2024 +0000
+++ b/test-data/test2_map.tab	Thu Jul 31 17:11:00 2025 +0000
@@ -1,4 +1,3 @@
 From	Entry	Entry Name	Reviewed	Protein names	Gene Names	Organism	Length
 NM_001087	Q13685	AAMP_HUMAN	reviewed	Angio-associated migratory cell protein	AAMP	Homo sapiens (Human)	434
 NM_130786	P04217	A1BG_HUMAN	reviewed	Alpha-1B-glycoprotein (Alpha-1-B glycoprotein)	A1BG	Homo sapiens (Human)	495
-NM_130786	V9HWD8	V9HWD8_HUMAN	unreviewed	Epididymis secretory sperm binding protein Li 163pA	HEL-S-163pA	Homo sapiens (Human)	495
--- a/uniprot.py	Tue Aug 06 14:49:34 2024 +0000
+++ b/uniprot.py	Thu Jul 31 17:11:00 2025 +0000
@@ -292,5 +292,8 @@
             sleep(5)
 
     if not isinstance(results, str):
-        results = "\n".join(results)
+        if args.format in ["fasta", "txt"]:
+            results = "".join(results)
+        else:
+            results = "\n".join(results)
     args.out.write(f"{results}\n")
--- a/uniprot.xml	Tue Aug 06 14:49:34 2024 +0000
+++ b/uniprot.xml	Thu Jul 31 17:11:00 2025 +0000
@@ -1,4 +1,4 @@
-<tool id="uniprot" name="UniProt" version="0.6" profile="23.1">
+<tool id="uniprot" name="UniProt" version="0.7" profile="23.1">
     <description>ID mapping and retrieval</description>
     <requirements>
         <requirement type="package" version="2.25.1">requests</requirement>
@@ -826,7 +826,7 @@
                 <param name="format" type="select" label="Choose format of output file" help="">
                     <option value="fasta">fasta</option>
                     <option value="gff">gff</option>
-                    <option value="txt">Text</option>
+                    <option value="txt">embl</option>
                 </param>
             </when>
         </conditional>
@@ -842,8 +842,8 @@
                 <filter>tool['tool_choice'] == 'retrieve'</filter>
                 <filter>tool['format'] == 'gff'</filter>
             </data>
-            <data name="outfile_retrieve_txt" format="txt" from_work_dir="./output"
-                label="${tool.name} on ${on_string}: txt">
+            <data name="outfile_retrieve_txt" format="embl" from_work_dir="./output"
+                label="${tool.name} on ${on_string}: embl">
                	<filter>tool['tool_choice'] == 'retrieve'</filter>
                 <filter>tool['format'] == 'txt'</filter>
             </data>
@@ -853,18 +853,28 @@
             </data>
     </outputs>
     <tests>
+        <!-- test fasta output: atm 7 sequences, so we test for 10+-5 header lines, since its multi line fasta we assume at least 5 lines of sequence-->
         <test expect_num_outputs="1">
             <param name="infile" value="id_uniprot.tab" ftype="tabular"/>
-            <param name="id_column" value="c1"/>
-            <param name="format" value="fasta"/>
-            <param name="tool_choice" value="retrieve"/>
-            <output name="outfile_retrieve_fasta" file="test1_retrieve.fasta" ftype="fasta" compare="sim_size" />
+            <param name="id_column" value="1"/>
+            <conditional name="tool">
+                <param name="format" value="fasta"/>
+                <param name="tool_choice" value="retrieve"/>
+            </conditional>
+            <output name="outfile_retrieve_fasta" ftype="fasta">
+                <assert_contents>
+                    <has_line_matching expression="^>.+$" n="10" delta="5"/>
+                    <has_line_matching expression="^[A-Z]+$" min="5"/>
+                </assert_contents>
+            </output>
         </test>
         <test expect_num_outputs="1">
             <param name="infile" value="id_uniprot.tab" ftype="tabular"/>
-            <param name="id_column" value="c1"/>
-            <param name="format" value="gff"/>
-            <param name="tool_choice" value="retrieve"/>
+            <param name="id_column" value="1"/>
+            <conditional name="tool">
+                <param name="format" value="gff"/>
+                <param name="tool_choice" value="retrieve"/>
+            </conditional>
             <output name="outfile_retrieve_gff" ftype="gff">
                 <assert_contents>
                     <has_n_lines min="80"/>
@@ -874,18 +884,30 @@
         </test>
         <test expect_num_outputs="1">
             <param name="infile" value="id_uniprot.tab" ftype="tabular"/>
-            <param name="id_column" value="c1"/>
-            <param name="tool_choice" value="map"/>
-            <param name="from" value="UniProtKB_AC-ID"/>
-            <param name="to" value="Gene_Name"/>
-            <output name="outfile_map" file="test1_map.tab" ftype="tabular" sort="true"/>
+            <param name="id_column" value="1"/>
+            <conditional name="tool">
+                <param name="format" value="txt"/>
+                <param name="tool_choice" value="retrieve"/>
+            </conditional>
+            <output name="outfile_retrieve_gff" ftype="embl">
+                <assert_contents>
+                    <has_n_lines min="80"/>
+                    <has_text text="UniProtKB"/>
+                    <has_line_matching expression="^ID .*"/>
+                    <has_line line="//"/>
+                </assert_contents>
+            </output>
         </test>
         <test expect_num_outputs="1">
             <param name="infile" value="id_map_refseq.txt" ftype="tabular"/>
-            <param name="id_column" value="c1"/>
-            <param name="tool_choice" value="map"/>
-            <param name="from" value="RefSeq_Nucleotide"/>
-            <param name="to" value="UniProtKB"/>
+            <param name="id_column" value="1"/>
+            <conditional name="tool">
+                <param name="tool_choice" value="map"/>
+                <conditional name="from_cond">
+                    <param name="from" value="RefSeq_Nucleotide"/>
+                    <param name="to" value="UniProtKB"/>
+                </conditional>
+            </conditional>
             <output name="outfile_map" file="test2_map.tab" ftype="tabular" sort="true"/>
         </test>
     </tests>