Mercurial > repos > iuc > interproscan
comparison interproscan.xml @ 0:bfeae84e23ee draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/interproscan commit 2f5d27a375fcc2e8d77914b3d9e402a9e2df2d97"
| author | iuc |
|---|---|
| date | Mon, 15 Nov 2021 17:20:51 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:bfeae84e23ee |
|---|---|
| 1 <tool id="interproscan" name="InterProScan" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.09"> | |
| 2 <description>functional annotation</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <xrefs> | |
| 7 <xref type="bio.tools">interproscan_4</xref> | |
| 8 </xrefs> | |
| 9 <requirements> | |
| 10 <requirement type="package" version="@TOOL_VERSION@">interproscan</requirement> | |
| 11 </requirements> | |
| 12 <version_command>interproscan.sh --version</version_command> | |
| 13 <command><![CDATA[ | |
| 14 ## Adapt properties file to use data from data table | |
| 15 mkdir -p \$HOME/.interproscan-5 | |
| 16 && | |
| 17 sed 's|^\(data.directory=\).*$|\1${database.fields.path}|' \$(dirname \$(readlink -f \$(command -v interproscan.sh)))/interproscan.properties > \$HOME/.interproscan-5/interproscan.properties | |
| 18 && | |
| 19 | |
| 20 ## Now run interproscan | |
| 21 interproscan.sh | |
| 22 | |
| 23 ## disables the precalculated lookup service, all calculation will be run locally | |
| 24 -dp | |
| 25 --input '$input' | |
| 26 --seqtype $seqtype | |
| 27 -f ${','.join($oformat)} | |
| 28 | |
| 29 #if $licensed.use == 'true' and $licensed.applications_licensed: | |
| 30 --applications ${','.join($applications)},${','.join($licensed.applications_licensed)} | |
| 31 #else: | |
| 32 --applications ${','.join($applications)} | |
| 33 #end if | |
| 34 --tempdir \$TEMP | |
| 35 | |
| 36 $pathways | |
| 37 $goterms | |
| 38 $iprlookup | |
| 39 | |
| 40 --cpu \${GALAXY_SLOTS:-4} | |
| 41 | |
| 42 --output-file-base 'output' | |
| 43 ]]></command> | |
| 44 <inputs> | |
| 45 <param argument="--input" type="data" format="fasta" label="Protein FASTA File"/> | |
| 46 | |
| 47 <param argument="--seqtype" type="select" label="Type of the input sequences" help=""> | |
| 48 <option value="p" selected="true">Protein</option> | |
| 49 <option value="n">DNA / RNA</option> | |
| 50 </param> | |
| 51 | |
| 52 <param name="database" label="InterProScan database" type="select"> | |
| 53 <options from_data_table="interproscan"> | |
| 54 <column name="value" index="0" /> | |
| 55 <column name="name" index="1" /> | |
| 56 <column name="path" index="3" /> | |
| 57 <filter type="sort_by" column="0" /> | |
| 58 <filter type="static_value" column="2" value="@TOOL_VERSION@" /> | |
| 59 </options> | |
| 60 </param> | |
| 61 | |
| 62 <param name="applications" type="select" multiple="True" label="Applications to run" help="Select your program"> | |
| 63 <option value="TIGRFAM" selected="true">TIGRFAM: protein families based on hidden Markov models (HMMs)</option> | |
| 64 <option value="SFLD" selected="true">SFLD: a database of protein families based on hidden Markov models (HMMs)</option> | |
| 65 <option value="SUPERFAMILY" selected="true">SUPERFAMILY: database of structural and functional annotation for all proteins and genomes</option> | |
| 66 <option value="PANTHER" selected="true">PANTHER: Protein ANalysis THrough Evolutionary Relationships</option> | |
| 67 <option value="Gene3D" selected="true">Gene3d: Structural assignment for whole genes and genomes using the CATH domain structure database</option> | |
| 68 <option value="Hamap" selected="true">HAMAP: High-quality Automated Annotation of Microbial Proteomes</option> | |
| 69 <option value="PrositeProfiles" selected="true">PROSITE Profiles: protein domains, families and functional sites as well as associated profiles to identify them</option> | |
| 70 <option value="Coils" selected="true">Coils: Prediction of Coiled Coil Regions in Proteins</option> | |
| 71 <option value="SMART" selected="true">SMART: identification and analysis of domain architectures based on Hidden Markov Models or HMMs</option> | |
| 72 <option value="CDD" selected="true">SMART: protein domains and families based on well-annotated multiple sequence alignment models</option> | |
| 73 <option value="PRINTS" selected="true">PRINTS: group of conserved motifs (fingerprints) used to characterise a protein family</option> | |
| 74 <option value="PIRSR" selected="true">PIRSR: protein families based on hidden Markov models (HMMs) and Site Rules</option> | |
| 75 <option value="PrositePatterns" selected="true">PROSITE Pattern: protein domains, families and functional sites as well as associated patterns to identify them</option> | |
| 76 <option value="Pfam" selected="true">Pfam: protein families, each represented by multiple sequence alignments and hidden Markov models</option> | |
| 77 <option value="MobiDBLite" selected="true">MobiDBLite: Prediction of intrinsically disordered regions in proteins</option> | |
| 78 <option value="PIRSF" selected="true">PIRSF: non-overlapping clustering of UniProtKB sequences into a hierarchical order (evolutionary relationships)</option> | |
| 79 </param> | |
| 80 | |
| 81 <conditional name="licensed"> | |
| 82 <param name="use" type="select" label="Use applications with restricted license, only for non-commercial use?" help="The corresponding tools must be installed manually by the administrator of this Galaxy instance" > | |
| 83 <option value="false" selected="true">No</option> | |
| 84 <option value="true">Yes</option> | |
| 85 </param> | |
| 86 <when value="false" /> | |
| 87 <when value="true"> | |
| 88 <param name="applications_licensed" type="select" multiple="True" label="Applications to run" help="Select your programm."> | |
| 89 <option value="Phobius" selected="true">Phobius: combined transmembrane topology and signal peptide predictor</option> | |
| 90 <option value="SignalP_GRAM_NEGATIVE" selected="false">SignalP (gram-negative): signal peptide cleavage sites in amino acid sequences for gram-negative prokaryotes</option> | |
| 91 <option value="SignalP_EUK" selected="true">SignalP (eukaryotes): signal peptide cleavage sites in amino acid sequences for eukaryotes</option> | |
| 92 <option value="SignalP_GRAM_POSITIVE" selected="false">SignalP (Gram Positive Bacteria): signal peptide cleavage sites in amino acid sequences for gram-positive prokaryotes</option> | |
| 93 <option value="TMHMM" selected="true">TMHMM: Prediction of transmembrane helices in proteins</option> | |
| 94 </param> | |
| 95 </when> | |
| 96 </conditional> | |
| 97 | |
| 98 <param argument="--pathways" truevalue="--pathways" falsevalue="" checked="True" type="boolean" label="Include pathway information" | |
| 99 help="Option that provides mappings from matches to pathway information, which is based on the matched manually curated InterPro entries."/> | |
| 100 <param argument="--goterms" truevalue="--goterms" falsevalue="" checked="True" type="boolean" label="Include Gene Ontology (GO) mappings" | |
| 101 help="Look up of corresponding Gene Ontology annotation. Implies -iprlookup option."/> | |
| 102 <param argument="--iprlookup" truevalue="--iprlookup" falsevalue="" checked="False" type="boolean" | |
| 103 label="Provide additional mappings" help="Provide mappings from matched member database signatures to the InterPro entries that they are integrated into"/> | |
| 104 | |
| 105 <param name="oformat" type="select" multiple="true" label="Output format" help="Please select a output format (JSON output can be visualised on https://www.ebi.ac.uk/interpro/result/InterProScan/)."> | |
| 106 <option value="TSV" selected="true">Tab-separated values format (TSV)</option> | |
| 107 <option value="GFF3">GFF3</option> | |
| 108 <option value="XML">XML</option> | |
| 109 <option value="JSON">JSON</option> | |
| 110 </param> | |
| 111 </inputs> | |
| 112 | |
| 113 <outputs> | |
| 114 <data format="tabular" name="outfile_tsv" from_work_dir="output.tsv" label="InterProScan on ${on_string} (tsv)"> | |
| 115 <filter>oformat and 'TSV' in outputs</filter> | |
| 116 </data> | |
| 117 <data format="xml" name="outfile_xml" from_work_dir="output.xml" label="InterProScan on ${on_string} (xml)"> | |
| 118 <filter>oformat and 'XML' in outputs</filter> | |
| 119 </data> | |
| 120 <data format="gff3" name="outfile_gff3" from_work_dir="output.gff3" label="InterProScan on ${on_string} (gff3)"> | |
| 121 <filter>oformat and 'GFF3' in outputs</filter> | |
| 122 </data> | |
| 123 <data format="json" name="outfile_json" from_work_dir="output.json" label="InterProScan on ${on_string} (json)"> | |
| 124 <filter>oformat and 'JSON' in outputs</filter> | |
| 125 </data> | |
| 126 </outputs> | |
| 127 | |
| 128 <tests> | |
| 129 <test> | |
| 130 <param name="input" value="prots.fa" /> | |
| 131 <param name="seqtype" value="p" /> | |
| 132 <param name="database" value="5.52-86.0" /> | |
| 133 <param name="applications" value="MobiDBLite" /> | |
| 134 <param name="oformat" value="TSV" /> | |
| 135 <output name="outfile_tsv"> | |
| 136 <assert_contents> | |
| 137 <has_text text="FUN_000011-T1" /> | |
| 138 <has_text text="ea9924e11f7decc417e8d9ed8b9c682e" /> | |
| 139 <has_text text="FUN_000012-T1" /> | |
| 140 <has_text text="01beedc2fbf8012cba37f0c0d39aa071" /> | |
| 141 </assert_contents> | |
| 142 </output> | |
| 143 </test> | |
| 144 <test> | |
| 145 <param name="input" value="prots.fa" /> | |
| 146 <param name="seqtype" value="p" /> | |
| 147 <param name="database" value="5.52-86.0" /> | |
| 148 <param name="applications" value="MobiDBLite" /> | |
| 149 <param name="oformat" value="TSV,GFF3,XML,JSON" /> | |
| 150 <output name="outfile_tsv"> | |
| 151 <assert_contents> | |
| 152 <has_text text="FUN_000011-T1" /> | |
| 153 <has_text text="ea9924e11f7decc417e8d9ed8b9c682e" /> | |
| 154 <has_text text="FUN_000012-T1" /> | |
| 155 <has_text text="01beedc2fbf8012cba37f0c0d39aa071" /> | |
| 156 </assert_contents> | |
| 157 </output> | |
| 158 <output name="outfile_xml"> | |
| 159 <assert_contents> | |
| 160 <has_text text="mobidblite-location" /> | |
| 161 <has_text text="Polyampholyte" /> | |
| 162 <has_text text="consensus disorder prediction" /> | |
| 163 <has_text text="FUN_000011-T1 FUN_000011" /> | |
| 164 </assert_contents> | |
| 165 </output> | |
| 166 <output name="outfile_gff3"> | |
| 167 <assert_contents> | |
| 168 <has_text text="protein_match" /> | |
| 169 <has_text text="ID=FUN_000011-T1;md5=" /> | |
| 170 <has_text text="MobiDBLite" /> | |
| 171 </assert_contents> | |
| 172 </output> | |
| 173 <output name="outfile_json"> | |
| 174 <assert_contents> | |
| 175 <has_text text="signatureLibraryRelease" /> | |
| 176 <has_text text="disorder_prediction" /> | |
| 177 <has_text text="Polyampholyte" /> | |
| 178 </assert_contents> | |
| 179 </output> | |
| 180 </test> | |
| 181 <test> | |
| 182 <param name="input" value="transcripts.fa" /> | |
| 183 <param name="seqtype" value="n" /> | |
| 184 <param name="database" value="5.52-86.0" /> | |
| 185 <param name="applications" value="MobiDBLite" /> | |
| 186 <param name="oformat" value="TSV,GFF3,XML,JSON" /> | |
| 187 <output name="outfile_tsv"> | |
| 188 <assert_contents> | |
| 189 <has_text text="FUN_000018-T1_orf336" /> | |
| 190 <has_text text="0b28fe115d4cc09260b038b19fb0b21d" /> | |
| 191 <has_text text="FUN_000012-T1_orf133" /> | |
| 192 <has_text text="01beedc2fbf8012cba37f0c0d39aa071" /> | |
| 193 </assert_contents> | |
| 194 </output> | |
| 195 <output name="outfile_xml"> | |
| 196 <assert_contents> | |
| 197 <has_text text="mobidblite-location" /> | |
| 198 <has_text text="Polyampholyte" /> | |
| 199 <has_text text="consensus disorder prediction" /> | |
| 200 <has_text text="orf355" /> | |
| 201 </assert_contents> | |
| 202 </output> | |
| 203 <output name="outfile_gff3"> | |
| 204 <assert_contents> | |
| 205 <has_text text="protein_match" /> | |
| 206 <has_text text="ID=FUN_000012-T1;" /> | |
| 207 <has_text text="MobiDBLite" /> | |
| 208 </assert_contents> | |
| 209 </output> | |
| 210 <output name="outfile_json"> | |
| 211 <assert_contents> | |
| 212 <has_text text="signatureLibraryRelease" /> | |
| 213 <has_text text="disorder_prediction" /> | |
| 214 <has_text text="Polyampholyte" /> | |
| 215 </assert_contents> | |
| 216 </output> | |
| 217 </test> | |
| 218 <test expect_failure="true"> | |
| 219 <param name="input" value="prots.fa" /> | |
| 220 <param name="seqtype" value="p" /> | |
| 221 <param name="database" value="5.52-86.0" /> | |
| 222 <param name="applications" value="MobiDBLite" /> | |
| 223 <conditional name="licensed"> | |
| 224 <param name="use" value="true" /> | |
| 225 <param name="applications_licensed" value="Phobius,TMHMM" /> | |
| 226 </conditional> | |
| 227 <param name="oformat" value="TSV" /> | |
| 228 <assert_stdout> | |
| 229 <!-- expected to be "deactivated" as they are not installed by default --> | |
| 230 <has_text text="Analysis Phobius does not exist or is deactivated" /> | |
| 231 <has_text text="Analysis TMHMM does not exist or is deactivated" /> | |
| 232 </assert_stdout> | |
| 233 </test> | |
| 234 </tests> | |
| 235 | |
| 236 <help><![CDATA[ | |
| 237 | |
| 238 **What it does** | |
| 239 | |
| 240 Interproscan is a batch tool to query the InterPro database. It provides annotations based on multiple searches of profile and other functional databases. | |
| 241 | |
| 242 Phobius (licensed software), SignalP, SMART (licensed components) and TMHMM use | |
| 243 licensed code and data provided by third parties. If you wish to run these | |
| 244 analyses it will be necessary for you to obtain a licence from the vendor and | |
| 245 configure the Galaxy server InterProScan installation to use them. | |
| 246 | |
| 247 **Input** | |
| 248 | |
| 249 Required is a FASTA file containing protein or nucleotide sequences. | |
| 250 | |
| 251 **Output** | |
| 252 | |
| 253 In this version of InterProScan, you can retrieve output in any of the following five formats: | |
| 254 | |
| 255 * TSV: tab-separated values format | |
| 256 * XML: XML format | |
| 257 * GFF: The GFF 3.0 format | |
| 258 * JSON: A JSON representation of the protein matches that can be visualised on https://www.ebi.ac.uk/interpro/result/InterProScan/ | |
| 259 | |
| 260 **Example Output** | |
| 261 | |
| 262 | |
| 263 :: | |
| 264 | |
| 265 P51587 14086411a2cdf1c4cba63020e1622579 3418 Pfam PF09103 BRCA2, oligonucleotide/oligosaccharide-binding, domain 1 2670 2799 7.9E-43 T 15-03-2013 | |
| 266 P51587 14086411a2cdf1c4cba63020e1622579 3418 ProSiteProfiles PS50138 BRCA2 repeat profile. 1002 1036 0.0 T 18-03-2013 IPR002093 BRCA2 repeat GO:0005515|GO:0006302 | |
| 267 P51587 14086411a2cdf1c4cba63020e1622579 3418 Gene3D G3DSA:2.40.50.140 2966 3051 3.1E-52 T 15-03-2013 | |
| 268 ... | |
| 269 | |
| 270 | |
| 271 The TSV format presents the match data in columns as follows: | |
| 272 | |
| 273 - Protein Accession (e.g. P51587) | |
| 274 - Sequence MD5 digest (e.g. 14086411a2cdf1c4cba63020e1622579) | |
| 275 - Sequence Length (e.g. 3418) | |
| 276 - Analysis (e.g. Pfam / PRINTS / Gene3D) | |
| 277 - Signature Accession (e.g. PF09103 / G3DSA:2.40.50.140) | |
| 278 - Signature Description (e.g. BRCA2 repeat profile) | |
| 279 - Start location | |
| 280 - Stop location | |
| 281 - Score - is the e-value of the match reported by member database method (e.g. 3.1E-52) | |
| 282 - Status - is the status of the match (T: true) | |
| 283 - Date - is the date of the run | |
| 284 - (InterProScan annotations - accession (e.g. IPR002093) - optional column; only displayed if -iprscan option is switched on) | |
| 285 - (InterProScan annotations - description (e.g. BRCA2 repeat) - optional column; only displayed if -iprscan option is switched on) | |
| 286 - (GO annotations (e.g. GO:0005515) - optional column; only displayed if --goterms option is switched on) | |
| 287 - (Pathways annotations (e.g. REACT_71) - optional column; only displayed if --pathways option is switched on) | |
| 288 | |
| 289 | |
| 290 **Extensible Markup Language (XML)** | |
| 291 | |
| 292 XML representation of the matches - this is the richest form of the data. The XML Schema Definition (XSD) is available [http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5 here]. | |
| 293 | |
| 294 **Generic Feature Format Version 3 (GFF3)** | |
| 295 | |
| 296 The GFF3 format is a flat tab-delimited file, which is much richer then the TSV output format. It allows you to trace back from matches to predicted proteins and to nucleic acid sequences. It also contains a FASTA format representation of the predicted protein sequences and their matches. You will find a documentation of all the columns and attributes used on [https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md]. | |
| 297 | |
| 298 **Example Output** | |
| 299 | |
| 300 | |
| 301 :: | |
| 302 | |
| 303 ##gff-version 3 | |
| 304 ##feature-ontology http://song.cvs.sourceforge.net/viewvc/song/ontology/sofa.obo?revision=1.269 | |
| 305 ##sequence-region AACH01000027 1 1347 | |
| 306 ##seqid|source|type|start|end|score|strand|phase|attributes | |
| 307 AACH01000027 provided_by_user nucleic_acid 1 1347 . + . Name=AACH01000027;md5=b2a7416cb92565c004becb7510f46840;ID=AACH01000027 | |
| 308 AACH01000027 getorf ORF 1 1347 . + . Name=AACH01000027.2_21;Target=pep_AACH01000027_1_1347 1 449;md5=b2a7416cb92565c004becb7510f46840;ID=orf_AACH01000027_1_1347 | |
| 309 AACH01000027 getorf polypeptide 1 449 . + . md5=fd0743a673ac69fb6e5c67a48f264dd5;ID=pep_AACH01000027_1_1347 | |
| 310 AACH01000027 Pfam protein_match 84 314 1.2E-45 + . Name=PF00696;signature_desc=Amino acid kinase family;Target=null 84 314;status=T;ID=match$8_84_314;Ontology_term="GO:0008652";date=15-04-2013;Dbxref="InterPro:IPR001048","Reactome:REACT_13" | |
| 311 ##sequence-region 2 | |
| 312 ... | |
| 313 >pep_AACH01000027_1_1347 | |
| 314 LVLLAAFDCIDDTKLVKQIIISEIINSLPNIVNDKYGRKVLLYLLSPRDPAHTVREIIEV | |
| 315 LQKGDGNAHSKKDTEIRRREMKYKRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEA | |
| 316 GHELILVSSGAIAAGFGALGFKKRPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQI | |
| 317 LLTQDDFVDKRRYKNAHQALSVLLNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQ | |
| 318 ADLLVFLTDVDGLYTGNPNSDPRAKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAA | |
| 319 TIATESGVPVYICSSLKSDSMIEAAEETEDGSYFVAQEKGLRTQKQWLAFYAQSQGSIWV | |
| 320 DKGAAEALSQYGKSLLLSGIVEAEGVFSYGDIVTVFDKESGKSLGKGRVQFGASALEDML | |
| 321 RSQKAKGVLIYRDDWISITPEIQLLFTEF | |
| 322 ... | |
| 323 >match$8_84_314 | |
| 324 KRIVFKVGTSSLTNEDGSLSRSKVKDITQQLAMLHEAGHELILVSSGAIAAGFGALGFKK | |
| 325 RPTKIADKQASAAVGQGLLLEEYTTNLLLRQIVSAQILLTQDDFVDKRRYKNAHQALSVL | |
| 326 LNRGAIPIINENDSVVIDELKVGDNDTLSAQVAAMVQADLLVFLTDVDGLYTGNPNSDPR | |
| 327 AKRLERIETINREIIDMAGGAGSSNGTGGMLTKIKAATIATESGVPVYICS | |
| 328 | |
| 329 ]]></help> | |
| 330 | |
| 331 <expand macro="citations" /> | |
| 332 </tool> |
