Mercurial > repos > galaxyp > dbbuilder
comparison dbbuilder.xml @ 8:3f6354b7eb94 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/dbbuilder commit 16ba4570b04301b774ee0420694f379cc640744b
| author | galaxyp |
|---|---|
| date | Tue, 27 Sep 2022 13:21:28 +0000 |
| parents | 8e263cd38da1 |
| children | 0f641df3f28a |
comparison
equal
deleted
inserted
replaced
| 7:8e263cd38da1 | 8:3f6354b7eb94 |
|---|---|
| 1 <tool id="dbbuilder" name="Protein Database Downloader" version="0.3.2"> | 1 <tool id="dbbuilder" name="Protein Database Downloader" version="0.3.3"> |
| 2 <description></description> | 2 <description></description> |
| 3 <requirements> | 3 <requirements> |
| 4 <requirement type="package" version="1.20.1">wget</requirement> | 4 <requirement type="package" version="1.20.1">wget</requirement> |
| 5 <requirement type="package" version="3.8">python</requirement> | |
| 6 <requirement type="package" version="2.20.1">requests</requirement> | |
| 5 </requirements> | 7 </requirements> |
| 6 <stdio> | 8 <stdio> |
| 7 <exit_code range="1:" level="fatal" description="Error downloading database." /> | 9 <exit_code range="1:" level="fatal" description="Error downloading database." /> |
| 8 <regex match="ERROR" level="fatal" source="stderr" description="Error downloading database." /> | 10 <regex match="ERROR" level="fatal" source="stderr" description="Error downloading database." /> |
| 9 </stdio> | 11 </stdio> |
| 12 <!-- http://maxquant.org/contaminants.zip --> | 14 <!-- http://maxquant.org/contaminants.zip --> |
| 13 <!-- ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.protein.faa.gz--> | 15 <!-- ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.protein.faa.gz--> |
| 14 <command> | 16 <command> |
| 15 <![CDATA[ | 17 <![CDATA[ |
| 16 #if $source.from == "uniprot" | 18 #if $source.from == "uniprot" |
| 17 #set $url = 'http://www.uniprot.org/uniprot/?query=taxonomy:"' + str($source.taxon) + '"' + str($source.set) + str($source.reviewed) + '&force=yes&format=fasta' + str($source.include_isoform) | 19 #if $source.set: |
| 18 #set $type = "direct" | 20 #set $modified_set = '&' + str($source.set) |
| 21 #else | |
| 22 #set $modified_set = '' | |
| 23 #end if | |
| 24 #if $source.taxon_id | |
| 25 #set $taxon_id = $source.taxon_id | |
| 26 #else | |
| 27 #set $taxon_id = $source.taxon | |
| 28 #end if | |
| 29 #set $url = 'https://rest.uniprot.org/uniprotkb/stream?compressed=true&format=fasta&query=taxonomy_id:"' + str($taxon_id) + '"' + str($modified_set) + str($source.reviewed) + str($source.include_isoform) | |
| 30 #set $type = "uniprotkb_stream" | |
| 19 #elif $source.from == "cRAP" | 31 #elif $source.from == "cRAP" |
| 20 ##set $url = "ftp://ftp.thegpm.org/fasta/cRAP/crap.fasta" | 32 ##set $url = "ftp://ftp.thegpm.org/fasta/cRAP/crap.fasta" |
| 21 #set $url = "https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta" | 33 #set $url = "https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta" |
| 22 #set $type = "direct" | 34 #set $type = "direct" |
| 23 #elif $source.from == "HMP" | 35 #elif $source.from == "HMP" |
| 32 #end if | 44 #end if |
| 33 #elif $source.from == "url" | 45 #elif $source.from == "url" |
| 34 #set $url = $source.url | 46 #set $url = $source.url |
| 35 #set $type = $source.archive_type | 47 #set $type = $source.archive_type |
| 36 #end if | 48 #end if |
| 37 #if $type =="direct" | 49 #if $type =="uniprotkb_stream" |
| 50 python '$__tool_directory__/uniprotkb.py' --url '$url' -o 'tmp.gz' && gzip -dc 'tmp.gz' > '${output_database}' | |
| 51 #elif $type =="direct" | |
| 38 wget -nv '$url' -O '${output_database}' --no-check-certificate | 52 wget -nv '$url' -O '${output_database}' --no-check-certificate |
| 39 #elif $type =="zip" | 53 #elif $type =="zip" |
| 40 wget -nv '$url' -O tmp.zip --no-check-certificate && zcat -c tmp.zip > '${output_database}' | 54 wget -nv '$url' -O tmp.zip --no-check-certificate && zcat -c tmp.zip > '${output_database}' |
| 41 #elif $type =="gzip" | 55 #elif $type =="gzip" |
| 42 wget -nv '$url' -O tmp.gz --no-check-certificate && (if `command -v gzcat > /dev/null`; then gzcat tmp.gz; else zcat tmp.gz ; fi) > '${output_database}' | 56 wget -nv '$url' -O tmp.gz --no-check-certificate && (if `command -v gzcat > /dev/null`; then gzcat tmp.gz; else zcat tmp.gz ; fi) > '${output_database}' |
| 49 #end if | 63 #end if |
| 50 ]]> | 64 ]]> |
| 51 </command> | 65 </command> |
| 52 <inputs> | 66 <inputs> |
| 53 <conditional name="source"> | 67 <conditional name="source"> |
| 54 <param name="from" type="select" label="Download from" help="Select database source. cRAP acts as a database for common MS contaminants. UniProtKB is a cross species collection of functional protein databases"> | 68 <param name="from" type="select" label="Download from" |
| 69 help="Select database source. cRAP acts as a database for common MS contaminants. UniProtKB is a cross species collection of functional protein databases"> | |
| 55 <option value="uniprot">UniProtKB</option> | 70 <option value="uniprot">UniProtKB</option> |
| 56 <option value="cRAP">cRAP (contaminants)</option> | 71 <option value="cRAP">cRAP (contaminants)</option> |
| 57 <option value="HMP">Human Microbiome Project body sites</option> | 72 <option value="HMP">Human Microbiome Project body sites</option> |
| 58 <option value="HOMD">Human Oral Microbiome Database (HOMD)</option> | 73 <option value="HOMD">Human Oral Microbiome Database (HOMD)</option> |
| 59 <option value="url">Custom URL</option> | 74 <option value="url">Custom URL</option> |
| 62 <param name="taxon" type="select" format="text" help="select species for protein database"> | 77 <param name="taxon" type="select" format="text" help="select species for protein database"> |
| 63 <label>Taxonomy</label> | 78 <label>Taxonomy</label> |
| 64 <options from_file="uniprot_taxons.loc"> | 79 <options from_file="uniprot_taxons.loc"> |
| 65 <column name="name" index="0" /> | 80 <column name="name" index="0" /> |
| 66 <column name="value" index="1" /> | 81 <column name="value" index="1" /> |
| 82 <filter type="add_value" name="Escherichia coli (strain K12)" value="83333" /> | |
| 67 </options> | 83 </options> |
| 68 </param> | 84 </param> |
| 85 <param name="taxon_id" type="integer" value="" min="1" optional="true" help="Specify a NCBI taxon id to override species selection"/> | |
| 69 <param name="reviewed" type="select" help="UniProtKB/TrEMBL (unreviewed)is a large, automatically annotated database- may contain redundant sequences, but there is a higher chance peptides will be identified. UniProtKB/Swiss-Prot (reviewed) is a smaller, manually annotated database- less of a chance peptides will be identified but less sequence redundancy"> | 86 <param name="reviewed" type="select" help="UniProtKB/TrEMBL (unreviewed)is a large, automatically annotated database- may contain redundant sequences, but there is a higher chance peptides will be identified. UniProtKB/Swiss-Prot (reviewed) is a smaller, manually annotated database- less of a chance peptides will be identified but less sequence redundancy"> |
| 70 <option value="+">UniProtKB</option> | 87 <option value="">UniProtKB</option> |
| 71 <option value="+reviewed%3Ayes">UniProtKB/Swiss-Prot (reviewed only)</option> | 88 <option value="+reviewed%3Atrue">UniProtKB/Swiss-Prot (reviewed only)</option> |
| 72 <option value="+reviewed%3Ano">UniProtKB/TrEMBL (unreviewed only)</option> | 89 <option value="+reviewed%3Afalse">UniProtKB/TrEMBL (unreviewed only)</option> |
| 73 <sanitizer> | 90 <sanitizer> |
| 74 <valid> | 91 <valid> |
| 75 <add value="%"/> | 92 <add value="%"/> |
| 76 </valid> | 93 </valid> |
| 77 </sanitizer> | 94 </sanitizer> |
| 78 </param> | 95 </param> |
| 79 <param name="set" type="select" label="Proteome Set"> | 96 <param name="set" type="select" label="Proteome Set"> |
| 80 <option value="+">Any</option> | 97 <option value="">Any</option> |
| 81 <option value="+keyword%3a1185" selected="true">Reference Proteome Set</option> | 98 <option value="keyword%3aKW-1185" selected="true">Reference Proteome Set</option> |
| 82 <sanitizer> | 99 <sanitizer> |
| 83 <valid> | 100 <valid> |
| 84 <add value="%"/> | 101 <add value="%"/> |
| 85 </valid> | 102 </valid> |
| 86 </sanitizer> | 103 </sanitizer> |
| 87 </param> | 104 </param> |
| 88 <param name="include_isoform" type="boolean" truevalue="&include=yes" falsevalue="" label="Include isoform data" help="several different forms of a given protein are incorporated into database" /> | 105 <param name="include_isoform" type="boolean" truevalue="&includeIsoform=true" falsevalue="" |
| 106 label="Include isoform data" help="several different forms of a given protein are incorporated into database" /> | |
| 89 </when> | 107 </when> |
| 90 <when value="cRAP" /> | 108 <when value="cRAP" /> |
| 91 <when value="HMP"> | 109 <when value="HMP"> |
| 92 <param name="site" type="select" label="Proteome for body site"> | 110 <param name="site" type="select" label="Proteome for body site"> |
| 93 <option value="Airways">HMP airways</option> | 111 <option value="Airways">HMP airways</option> |
| 127 <outputs> | 145 <outputs> |
| 128 <data format="fasta" name="output_database" label="Protein Database ${source.from}" /> | 146 <data format="fasta" name="output_database" label="Protein Database ${source.from}" /> |
| 129 </outputs> | 147 </outputs> |
| 130 <tests> | 148 <tests> |
| 131 <test> | 149 <test> |
| 132 <param name="from" value="cRAP" /> | 150 <conditional name="source"> |
| 151 <param name="from" value="cRAP" /> | |
| 152 </conditional> | |
| 133 <output name="output_database"> | 153 <output name="output_database"> |
| 134 <assert_contents> | 154 <assert_contents> |
| 135 <has_text text="KKA1_ECOLX" /> | 155 <has_text text="KKA1_ECOLX" /> |
| 136 </assert_contents> | 156 </assert_contents> |
| 137 </output> | 157 </output> |
| 138 </test> | 158 </test> |
| 159 <test> | |
| 160 <conditional name="source"> | |
| 161 <param name="from" value="uniprot" /> | |
| 162 <param name="taxon" value="83333"/> | |
| 163 <param name="taxon_id" value="2697049"/> | |
| 164 </conditional> | |
| 165 <output name="output_database"> | |
| 166 <assert_contents> | |
| 167 <has_text text="SPIKE_SARS2" /> | |
| 168 </assert_contents> | |
| 169 </output> | |
| 170 </test> | |
| 171 <test> | |
| 172 <conditional name="source"> | |
| 173 <param name="from" value="uniprot" /> | |
| 174 <param name="taxon_id" value="2697049"/> | |
| 175 <param name="reviewed" value="+reviewed%3Atrue"/> | |
| 176 <param name="set" value=""/> | |
| 177 </conditional> | |
| 178 <output name="output_database"> | |
| 179 <assert_contents> | |
| 180 <has_text text=">sp|P0DTC1|R1A_SARS2" /> | |
| 181 <not_has_text text=">tr|A0A679G4D8|A0A679G4D8_SARS2" /> | |
| 182 </assert_contents> | |
| 183 </output> | |
| 184 </test> | |
| 185 <test> | |
| 186 <conditional name="source"> | |
| 187 <param name="from" value="uniprot" /> | |
| 188 <param name="taxon_id" value="2697049"/> | |
| 189 <param name="reviewed" value="+reviewed%3Afalse"/> | |
| 190 <param name="set" value=""/> | |
| 191 </conditional> | |
| 192 <output name="output_database"> | |
| 193 <assert_contents> | |
| 194 <has_text text=">tr|A0A679G4D8|A0A679G4D8_SARS2" /> | |
| 195 <not_has_text text=">sp|P0DTC1|R1A_SARS2" /> | |
| 196 </assert_contents> | |
| 197 </output> | |
| 198 </test> | |
| 199 | |
| 139 <test> | 200 <test> |
| 140 <param name="from" value="url" /> | 201 <param name="from" value="url" /> |
| 141 <param name="url" value="https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta" /> | 202 <param name="url" value="https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta" /> |
| 142 <param name="archive_type" value="direct" /> | 203 <param name="archive_type" value="direct" /> |
| 143 <output name="output_database"> | 204 <output name="output_database"> |
