comparison interproscan.xml @ 0:bfeae84e23ee draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/interproscan commit 2f5d27a375fcc2e8d77914b3d9e402a9e2df2d97"
author iuc
date Mon, 15 Nov 2021 17:20:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:bfeae84e23ee
1 <tool id="interproscan" name="InterProScan" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.09">
2 <description>functional annotation</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <xrefs>
7 <xref type="bio.tools">interproscan_4</xref>
8 </xrefs>
9 <requirements>
10 <requirement type="package" version="@TOOL_VERSION@">interproscan</requirement>
11 </requirements>
12 <version_command>interproscan.sh --version</version_command>
13 <command><![CDATA[
14 ## Adapt properties file to use data from data table
15 mkdir -p \$HOME/.interproscan-5
16 &&
17 sed 's|^\(data.directory=\).*$|\1${database.fields.path}|' \$(dirname \$(readlink -f \$(command -v interproscan.sh)))/interproscan.properties > \$HOME/.interproscan-5/interproscan.properties
18 &&
19
20 ## Now run interproscan
21 interproscan.sh
22
23 ## disables the precalculated lookup service, all calculation will be run locally
24 -dp
25 --input '$input'
26 --seqtype $seqtype
27 -f ${','.join($oformat)}
28
29 #if $licensed.use == 'true' and $licensed.applications_licensed:
30 --applications ${','.join($applications)},${','.join($licensed.applications_licensed)}
31 #else:
32 --applications ${','.join($applications)}
33 #end if
34 --tempdir \$TEMP
35
36 $pathways
37 $goterms
38 $iprlookup
39
40 --cpu \${GALAXY_SLOTS:-4}
41
42 --output-file-base 'output'
43 ]]></command>
44 <inputs>
45 <param argument="--input" type="data" format="fasta" label="Protein FASTA File"/>
46
47 <param argument="--seqtype" type="select" label="Type of the input sequences" help="">
48 <option value="p" selected="true">Protein</option>
49 <option value="n">DNA / RNA</option>
50 </param>
51
52 <param name="database" label="InterProScan database" type="select">
53 <options from_data_table="interproscan">
54 <column name="value" index="0" />
55 <column name="name" index="1" />
56 <column name="path" index="3" />
57 <filter type="sort_by" column="0" />
58 <filter type="static_value" column="2" value="@TOOL_VERSION@" />
59 </options>
60 </param>
61
62 <param name="applications" type="select" multiple="True" label="Applications to run" help="Select your program">
63 <option value="TIGRFAM" selected="true">TIGRFAM: protein families based on hidden Markov models (HMMs)</option>
64 <option value="SFLD" selected="true">SFLD: a database of protein families based on hidden Markov models (HMMs)</option>
65 <option value="SUPERFAMILY" selected="true">SUPERFAMILY: database of structural and functional annotation for all proteins and genomes</option>
66 <option value="PANTHER" selected="true">PANTHER: Protein ANalysis THrough Evolutionary Relationships</option>
67 <option value="Gene3D" selected="true">Gene3d: Structural assignment for whole genes and genomes using the CATH domain structure database</option>
68 <option value="Hamap" selected="true">HAMAP: High-quality Automated Annotation of Microbial Proteomes</option>
69 <option value="PrositeProfiles" selected="true">PROSITE Profiles: protein domains, families and functional sites as well as associated profiles to identify them</option>
70 <option value="Coils" selected="true">Coils: Prediction of Coiled Coil Regions in Proteins</option>
71 <option value="SMART" selected="true">SMART: identification and analysis of domain architectures based on Hidden Markov Models or HMMs</option>
72 <option value="CDD" selected="true">SMART: protein domains and families based on well-annotated multiple sequence alignment models</option>
73 <option value="PRINTS" selected="true">PRINTS: group of conserved motifs (fingerprints) used to characterise a protein family</option>
74 <option value="PIRSR" selected="true">PIRSR: protein families based on hidden Markov models (HMMs) and Site Rules</option>
75 <option value="PrositePatterns" selected="true">PROSITE Pattern: protein domains, families and functional sites as well as associated patterns to identify them</option>
76 <option value="Pfam" selected="true">Pfam: protein families, each represented by multiple sequence alignments and hidden Markov models</option>
77 <option value="MobiDBLite" selected="true">MobiDBLite: Prediction of intrinsically disordered regions in proteins</option>
78 <option value="PIRSF" selected="true">PIRSF: non-overlapping clustering of UniProtKB sequences into a hierarchical order (evolutionary relationships)</option>
79 </param>
80
81 <conditional name="licensed">
82 <param name="use" type="select" label="Use applications with restricted license, only for non-commercial use?" help="The corresponding tools must be installed manually by the administrator of this Galaxy instance" >
83 <option value="false" selected="true">No</option>
84 <option value="true">Yes</option>
85 </param>
86 <when value="false" />
87 <when value="true">
88 <param name="applications_licensed" type="select" multiple="True" label="Applications to run" help="Select your programm.">
89 <option value="Phobius" selected="true">Phobius: combined transmembrane topology and signal peptide predictor</option>
90 <option value="SignalP_GRAM_NEGATIVE" selected="false">SignalP (gram-negative): signal peptide cleavage sites in amino acid sequences for gram-negative prokaryotes</option>
91 <option value="SignalP_EUK" selected="true">SignalP (eukaryotes): signal peptide cleavage sites in amino acid sequences for eukaryotes</option>
92 <option value="SignalP_GRAM_POSITIVE" selected="false">SignalP (Gram Positive Bacteria): signal peptide cleavage sites in amino acid sequences for gram-positive prokaryotes</option>
93 <option value="TMHMM" selected="true">TMHMM: Prediction of transmembrane helices in proteins</option>
94 </param>
95 </when>
96 </conditional>
97
98 <param argument="--pathways" truevalue="--pathways" falsevalue="" checked="True" type="boolean" label="Include pathway information"
99 help="Option that provides mappings from matches to pathway information, which is based on the matched manually curated InterPro entries."/>
100 <param argument="--goterms" truevalue="--goterms" falsevalue="" checked="True" type="boolean" label="Include Gene Ontology (GO) mappings"
101 help="Look up of corresponding Gene Ontology annotation. Implies -iprlookup option."/>
102 <param argument="--iprlookup" truevalue="--iprlookup" falsevalue="" checked="False" type="boolean"
103 label="Provide additional mappings" help="Provide mappings from matched member database signatures to the InterPro entries that they are integrated into"/>
104
105 <param name="oformat" type="select" multiple="true" label="Output format" help="Please select a output format (JSON output can be visualised on https://www.ebi.ac.uk/interpro/result/InterProScan/).">
106 <option value="TSV" selected="true">Tab-separated values format (TSV)</option>
107 <option value="GFF3">GFF3</option>
108 <option value="XML">XML</option>
109 <option value="JSON">JSON</option>
110 </param>
111 </inputs>
112
113 <outputs>
114 <data format="tabular" name="outfile_tsv" from_work_dir="output.tsv" label="InterProScan on ${on_string} (tsv)">
115 <filter>oformat and 'TSV' in outputs</filter>
116 </data>
117 <data format="xml" name="outfile_xml" from_work_dir="output.xml" label="InterProScan on ${on_string} (xml)">
118 <filter>oformat and 'XML' in outputs</filter>
119 </data>
120 <data format="gff3" name="outfile_gff3" from_work_dir="output.gff3" label="InterProScan on ${on_string} (gff3)">
121 <filter>oformat and 'GFF3' in outputs</filter>
122 </data>
123 <data format="json" name="outfile_json" from_work_dir="output.json" label="InterProScan on ${on_string} (json)">
124 <filter>oformat and 'JSON' in outputs</filter>
125 </data>
126 </outputs>
127
128 <tests>
129 <test>
130 <param name="input" value="prots.fa" />
131 <param name="seqtype" value="p" />
132 <param name="database" value="5.52-86.0" />
133 <param name="applications" value="MobiDBLite" />
134 <param name="oformat" value="TSV" />
135 <output name="outfile_tsv">
136 <assert_contents>
137 <has_text text="FUN_000011-T1" />
138 <has_text text="ea9924e11f7decc417e8d9ed8b9c682e" />
139 <has_text text="FUN_000012-T1" />
140 <has_text text="01beedc2fbf8012cba37f0c0d39aa071" />
141 </assert_contents>
142 </output>
143 </test>
144 <test>
145 <param name="input" value="prots.fa" />
146 <param name="seqtype" value="p" />
147 <param name="database" value="5.52-86.0" />
148 <param name="applications" value="MobiDBLite" />
149 <param name="oformat" value="TSV,GFF3,XML,JSON" />
150 <output name="outfile_tsv">
151 <assert_contents>
152 <has_text text="FUN_000011-T1" />
153 <has_text text="ea9924e11f7decc417e8d9ed8b9c682e" />
154 <has_text text="FUN_000012-T1" />
155 <has_text text="01beedc2fbf8012cba37f0c0d39aa071" />
156 </assert_contents>
157 </output>
158 <output name="outfile_xml">
159 <assert_contents>
160 <has_text text="mobidblite-location" />
161 <has_text text="Polyampholyte" />
162 <has_text text="consensus disorder prediction" />
163 <has_text text="FUN_000011-T1 FUN_000011" />
164 </assert_contents>
165 </output>
166 <output name="outfile_gff3">
167 <assert_contents>
168 <has_text text="protein_match" />
169 <has_text text="ID=FUN_000011-T1;md5=" />
170 <has_text text="MobiDBLite" />
171 </assert_contents>
172 </output>
173 <output name="outfile_json">
174 <assert_contents>
175 <has_text text="signatureLibraryRelease" />
176 <has_text text="disorder_prediction" />
177 <has_text text="Polyampholyte" />
178 </assert_contents>
179 </output>
180 </test>
181 <test>
182 <param name="input" value="transcripts.fa" />
183 <param name="seqtype" value="n" />
184 <param name="database" value="5.52-86.0" />
185 <param name="applications" value="MobiDBLite" />
186 <param name="oformat" value="TSV,GFF3,XML,JSON" />
187 <output name="outfile_tsv">
188 <assert_contents>
189 <has_text text="FUN_000018-T1_orf336" />
190 <has_text text="0b28fe115d4cc09260b038b19fb0b21d" />
191 <has_text text="FUN_000012-T1_orf133" />
192 <has_text text="01beedc2fbf8012cba37f0c0d39aa071" />
193 </assert_contents>
194 </output>
195 <output name="outfile_xml">
196 <assert_contents>
197 <has_text text="mobidblite-location" />
198 <has_text text="Polyampholyte" />
199 <has_text text="consensus disorder prediction" />
200 <has_text text="orf355" />
201 </assert_contents>
202 </output>
203 <output name="outfile_gff3">
204 <assert_contents>
205 <has_text text="protein_match" />
206 <has_text text="ID=FUN_000012-T1;" />
207 <has_text text="MobiDBLite" />
208 </assert_contents>
209 </output>
210 <output name="outfile_json">
211 <assert_contents>
212 <has_text text="signatureLibraryRelease" />
213 <has_text text="disorder_prediction" />
214 <has_text text="Polyampholyte" />
215 </assert_contents>
216 </output>
217 </test>
218 <test expect_failure="true">
219 <param name="input" value="prots.fa" />
220 <param name="seqtype" value="p" />
221 <param name="database" value="5.52-86.0" />
222 <param name="applications" value="MobiDBLite" />
223 <conditional name="licensed">
224 <param name="use" value="true" />
225 <param name="applications_licensed" value="Phobius,TMHMM" />
226 </conditional>
227 <param name="oformat" value="TSV" />
228 <assert_stdout>
229 <!-- expected to be "deactivated" as they are not installed by default -->
230 <has_text text="Analysis Phobius does not exist or is deactivated" />
231 <has_text text="Analysis TMHMM does not exist or is deactivated" />
232 </assert_stdout>
233 </test>
234 </tests>
235
236 <help><![CDATA[
237
238 **What it does**
239
240 Interproscan is a batch tool to query the InterPro database. It provides annotations based on multiple searches of profile and other functional databases.
241
242 Phobius (licensed software), SignalP, SMART (licensed components) and TMHMM use
243 licensed code and data provided by third parties. If you wish to run these
244 analyses it will be necessary for you to obtain a licence from the vendor and
245 configure the Galaxy server InterProScan installation to use them.
246
247 **Input**
248
249 Required is a FASTA file containing protein or nucleotide sequences.
250
251 **Output**
252
253 In this version of InterProScan, you can retrieve output in any of the following five formats:
254
255 * TSV: tab-separated values format
256 * XML: XML format
257 * GFF: The GFF 3.0 format
258 * JSON: A JSON representation of the protein matches that can be visualised on https://www.ebi.ac.uk/interpro/result/InterProScan/
259
260 **Example Output**
261
262
263 ::
264
265 P51587 14086411a2cdf1c4cba63020e1622579 3418 Pfam PF09103 BRCA2, oligonucleotide/oligosaccharide-binding, domain 1 2670 2799 7.9E-43 T 15-03-2013
266 P51587 14086411a2cdf1c4cba63020e1622579 3418 ProSiteProfiles PS50138 BRCA2 repeat profile. 1002 1036 0.0 T 18-03-2013 IPR002093 BRCA2 repeat GO:0005515|GO:0006302
267 P51587 14086411a2cdf1c4cba63020e1622579 3418 Gene3D G3DSA:2.40.50.140 2966 3051 3.1E-52 T 15-03-2013
268 ...
269
270
271 The TSV format presents the match data in columns as follows:
272
273 - Protein Accession (e.g. P51587)
274 - Sequence MD5 digest (e.g. 14086411a2cdf1c4cba63020e1622579)
275 - Sequence Length (e.g. 3418)
276 - Analysis (e.g. Pfam / PRINTS / Gene3D)
277 - Signature Accession (e.g. PF09103 / G3DSA:2.40.50.140)
278 - Signature Description (e.g. BRCA2 repeat profile)
279 - Start location
280 - Stop location
281 - Score - is the e-value of the match reported by member database method (e.g. 3.1E-52)
282 - Status - is the status of the match (T: true)
283 - Date - is the date of the run
284 - (InterProScan annotations - accession (e.g. IPR002093) - optional column; only displayed if -iprscan option is switched on)
285 - (InterProScan annotations - description (e.g. BRCA2 repeat) - optional column; only displayed if -iprscan option is switched on)
286 - (GO annotations (e.g. GO:0005515) - optional column; only displayed if --goterms option is switched on)
287 - (Pathways annotations (e.g. REACT_71) - optional column; only displayed if --pathways option is switched on)
288
289
290 **Extensible Markup Language (XML)**
291
292 XML representation of the matches - this is the richest form of the data. The XML Schema Definition (XSD) is available [http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5 here].
293
294 **Generic Feature Format Version 3 (GFF3)**
295
296 The GFF3 format is a flat tab-delimited file, which is much richer then the TSV output format. It allows you to trace back from matches to predicted proteins and to nucleic acid sequences. It also contains a FASTA format representation of the predicted protein sequences and their matches. You will find a documentation of all the columns and attributes used on [https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md].
297
298 **Example Output**
299
300
301 ::
302
303 ##gff-version 3
304 ##feature-ontology http://song.cvs.sourceforge.net/viewvc/song/ontology/sofa.obo?revision=1.269
305 ##sequence-region AACH01000027 1 1347
306 ##seqid|source|type|start|end|score|strand|phase|attributes
307 AACH01000027 provided_by_user nucleic_acid 1 1347 . + . Name=AACH01000027;md5=b2a7416cb92565c004becb7510f46840;ID=AACH01000027
308 AACH01000027 getorf ORF 1 1347 . + . Name=AACH01000027.2_21;Target=pep_AACH01000027_1_1347 1 449;md5=b2a7416cb92565c004becb7510f46840;ID=orf_AACH01000027_1_1347
309 AACH01000027 getorf polypeptide 1 449 . + . md5=fd0743a673ac69fb6e5c67a48f264dd5;ID=pep_AACH01000027_1_1347
310 AACH01000027 Pfam protein_match 84 314 1.2E-45 + . Name=PF00696;signature_desc=Amino acid kinase family;Target=null 84 314;status=T;ID=match$8_84_314;Ontology_term="GO:0008652";date=15-04-2013;Dbxref="InterPro:IPR001048","Reactome:REACT_13"
311 ##sequence-region 2
312 ...
313 >pep_AACH01000027_1_1347
314 LVLLAAFDCIDDTKLVKQIIISEIINSLPNIVNDKYGRKVLLYLLSPRDPAHTVREIIEV
315 LQKGDGNAHSKKDTEIRRREMKYKRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEA
316 GHELILVSSGAIAAGFGALGFKKRPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQI
317 LLTQDDFVDKRRYKNAHQALSVLLNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQ
318 ADLLVFLTDVDGLYTGNPNSDPRAKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAA
319 TIATESGVPVYICSSLKSDSMIEAAEETEDGSYFVAQEKGLRTQKQWLAFYAQSQGSIWV
320 DKGAAEALSQYGKSLLLSGIVEAEGVFSYGDIVTVFDKESGKSLGKGRVQFGASALEDML
321 RSQKAKGVLIYRDDWISITPEIQLLFTEF
322 ...
323 >match$8_84_314
324 KRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEAGHELILVSSGAIAAGFGALGFKK
325 RPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQILLTQDDFVDKRRYKNAHQALSVL
326 LNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQADLLVFLTDVDGLYTGNPNSDPR
327 AKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAATIATESGVPVYICS
328
329 ]]></help>
330
331 <expand macro="citations" />
332 </tool>