dbbuilder: dbbuilder.xml comparison

comparison dbbuilder.xml @ 8:3f6354b7eb94 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/dbbuilder commit 16ba4570b04301b774ee0420694f379cc640744b

author	galaxyp
date	Tue, 27 Sep 2022 13:21:28 +0000
parents	8e263cd38da1
children	0f641df3f28a

comparison

equal deleted inserted replaced

-:8e263cd38da1
+:3f6354b7eb94
-<tool id="dbbuilder" name="Protein Database Downloader" version="0.3.2">
+<tool id="dbbuilder" name="Protein Database Downloader" version="0.3.3">
 <description></description>
 <requirements>
 <requirement type="package" version="1.20.1">wget</requirement>
+<requirement type="package" version="3.8">python</requirement>
+<requirement type="package" version="2.20.1">requests</requirement>
 </requirements>
 <stdio>
 <exit_code range="1:"  level="fatal" description="Error downloading database." />
 <regex match="ERROR" level="fatal" source="stderr" description="Error downloading database." />
 </stdio>
 <!-- http://maxquant.org/contaminants.zip -->
 <!-- ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.protein.faa.gz-->
 <command>
 <![CDATA[
 #if $source.from == "uniprot"
-#set $url = 'http://www.uniprot.org/uniprot/?query=taxonomy:"' + str($source.taxon) + '"' + str($source.set) + str($source.reviewed) + '&force=yes&format=fasta' + str($source.include_isoform)
+#if $source.set:
-#set $type = "direct"
+#set $modified_set = '&' + str($source.set)
+#else
+#set $modified_set = ''
+#end if
+#if $source.taxon_id
+#set $taxon_id = $source.taxon_id
+#else
+#set $taxon_id = $source.taxon
+#end if
+#set $url = 'https://rest.uniprot.org/uniprotkb/stream?compressed=true&format=fasta&query=taxonomy_id:"' + str($taxon_id) + '"' + str($modified_set) + str($source.reviewed) + str($source.include_isoform)
+#set $type = "uniprotkb_stream"
 #elif $source.from == "cRAP"
 ##set $url = "ftp://ftp.thegpm.org/fasta/cRAP/crap.fasta"
 #set $url = "https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta"
 #set $type = "direct"
 #elif $source.from == "HMP"
 #end if
 #elif $source.from == "url"
 #set $url = $source.url
 #set $type = $source.archive_type
 #end if
-#if $type =="direct"
+#if $type =="uniprotkb_stream"
+python '$__tool_directory__/uniprotkb.py' --url '$url' -o 'tmp.gz' && gzip -dc 'tmp.gz' > '${output_database}'
+#elif $type =="direct"
 wget -nv '$url' -O '${output_database}' --no-check-certificate
 #elif $type =="zip"
 wget -nv '$url' -O tmp.zip --no-check-certificate && zcat -c tmp.zip > '${output_database}'
 #elif $type =="gzip"
 wget -nv '$url' -O tmp.gz --no-check-certificate && (if `command -v gzcat > /dev/null`;  then gzcat tmp.gz; else zcat tmp.gz ; fi) > '${output_database}'
 #end if
 ]]>
 </command>
 <inputs>
 <conditional name="source">
-<param name="from" type="select" label="Download from" help="Select database source. cRAP acts as a database for common MS contaminants. UniProtKB is a cross species collection of functional protein databases">
+<param name="from" type="select" label="Download from"
+help="Select database source. cRAP acts as a database for common MS contaminants. UniProtKB is a cross species collection of functional protein databases">
 <option value="uniprot">UniProtKB</option>
 <option value="cRAP">cRAP (contaminants)</option>
 <option value="HMP">Human Microbiome Project body sites</option>
 <option value="HOMD">Human Oral Microbiome Database (HOMD)</option>
 <option value="url">Custom URL</option>
 <param name="taxon" type="select" format="text" help="select species for protein database">
 <label>Taxonomy</label>
 <options from_file="uniprot_taxons.loc">
 <column name="name" index="0" />
 <column name="value" index="1" />
+<filter type="add_value" name="Escherichia coli (strain K12)" value="83333" />
 </options>
 </param>
+<param name="taxon_id" type="integer" value="" min="1" optional="true" help="Specify a NCBI taxon id to override species selection"/>
 <param name="reviewed" type="select" help="UniProtKB/TrEMBL (unreviewed)is a large, automatically annotated database- may contain redundant sequences, but there is a higher chance peptides will be identified. UniProtKB/Swiss-Prot (reviewed) is a smaller, manually annotated database- less of a chance peptides will be identified but less sequence redundancy">
-<option value="+">UniProtKB</option>
+<option value="">UniProtKB</option>
-<option value="+reviewed%3Ayes">UniProtKB/Swiss-Prot (reviewed only)</option>
+<option value="+reviewed%3Atrue">UniProtKB/Swiss-Prot (reviewed only)</option>
-<option value="+reviewed%3Ano">UniProtKB/TrEMBL (unreviewed only)</option>
+<option value="+reviewed%3Afalse">UniProtKB/TrEMBL (unreviewed only)</option>
 <sanitizer>
 <valid>
 <add value="%"/>
 </valid>
 </sanitizer>
 </param>
 <param name="set" type="select" label="Proteome Set">
-<option value="+">Any</option>
+<option value="">Any</option>
-<option value="+keyword%3a1185" selected="true">Reference Proteome Set</option>
+<option value="keyword%3aKW-1185" selected="true">Reference Proteome Set</option>
 <sanitizer>
 <valid>
 <add value="%"/>
 </valid>
 </sanitizer>
 </param>
-<param name="include_isoform" type="boolean" truevalue="&amp;include=yes" falsevalue="" label="Include isoform data" help="several different forms of a given protein are incorporated into database" />
+<param name="include_isoform" type="boolean" truevalue="&amp;includeIsoform=true" falsevalue=""
+label="Include isoform data" help="several different forms of a given protein are incorporated into database" />
 </when>
 <when value="cRAP" />
 <when value="HMP">
 <param name="site" type="select" label="Proteome for body site">
 <option value="Airways">HMP airways</option>
 <outputs>
 <data format="fasta" name="output_database" label="Protein Database ${source.from}" />
 </outputs>
 <tests>
 <test>
-<param name="from" value="cRAP" />
+<conditional name="source">
+<param name="from" value="cRAP" />
+</conditional>
 <output name="output_database">
 <assert_contents>
 <has_text text="KKA1_ECOLX" />
 </assert_contents>
 </output>
 </test>
+<test>
+<conditional name="source">
+<param name="from" value="uniprot" />
+<param name="taxon" value="83333"/>
+<param name="taxon_id" value="2697049"/>
+</conditional>
+<output name="output_database">
+<assert_contents>
+<has_text text="SPIKE_SARS2" />
+</assert_contents>
+</output>
+</test>
+<test>
+<conditional name="source">
+<param name="from" value="uniprot" />
+<param name="taxon_id" value="2697049"/>
+<param name="reviewed" value="+reviewed%3Atrue"/>
+<param name="set" value=""/>
+</conditional>
+<output name="output_database">
+<assert_contents>
+<has_text text=">sp|P0DTC1|R1A_SARS2" />
+<not_has_text text=">tr|A0A679G4D8|A0A679G4D8_SARS2" />
+</assert_contents>
+</output>
+</test>
+<test>
+<conditional name="source">
+<param name="from" value="uniprot" />
+<param name="taxon_id" value="2697049"/>
+<param name="reviewed" value="+reviewed%3Afalse"/>
+<param name="set" value=""/>
+</conditional>
+<output name="output_database">
+<assert_contents>
+<has_text text=">tr|A0A679G4D8|A0A679G4D8_SARS2" />
+<not_has_text text=">sp|P0DTC1|R1A_SARS2" />
+</assert_contents>
+</output>
+</test>
 <test>
 <param name="from" value="url" />
 <param name="url" value="https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta" />
 <param name="archive_type" value="direct" />
 <output name="output_database">

Mercurial > repos > galaxyp > dbbuilder

comparison dbbuilder.xml @ 8:3f6354b7eb94 draft