comparison alphafold.xml @ 0:67c179acafdd draft default tip

"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86-dirty"
author galaxy-australia
date Thu, 03 Mar 2022 02:54:20 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:67c179acafdd
1 <tool id="alphafold" name="alphafold" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
2 <description>Alphafold v2.0: AI-guided 3D structure prediction of proteins</description>
3 <macros>
4 <token name="@TOOL_VERSION@">2.0.0</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 </macros>
7 <edam_topics>
8 <edam_topic>topic_0082</edam_topic>
9 </edam_topics>
10 <edam_operations>
11 <edam_operation>operation_0474</edam_operation>
12 </edam_operations>
13 <xrefs>
14 <xref type="bio.tools">alphafold_2.0</xref>
15 </xrefs>
16 <requirements>
17 <container type="docker">neoformit/alphafold:latest</container>
18 </requirements>
19 <command detect_errors="exit_code"><![CDATA[
20
21 ## $ALPHAFOLD_DB variable should point to the location of the AlphaFold
22 ## databases - defaults to /data
23
24 ## fasta setup ----------------------------
25 #if $fasta_or_text.input_mode == 'history':
26 cp '$fasta_or_text.fasta_file' input.fasta &&
27
28 #elif $fasta_or_text.input_mode == 'textbox':
29 echo '$fasta_or_text.fasta_text' > input.fasta &&
30 #end if
31
32 python3 '$__tool_directory__/validate_fasta.py' input.fasta &&
33
34 ## env vars -------------------------------
35 export TF_FORCE_UNIFIED_MEMORY=1 &&
36 export XLA_PYTHON_CLIENT_MEM_FRACTION=4.0 &&
37 export DATE=`date +"%Y-%m-%d"` &&
38
39 ## run alphafold -------------------------
40 python /app/alphafold/run_alphafold.py
41 --fasta_paths alphafold.fasta
42 --output_dir output
43 --data_dir \${ALPHAFOLD_DB:-/data}
44 --uniref90_database_path \${ALPHAFOLD_DB:-/data}/uniref90/uniref90.fasta
45 --mgnify_database_path \${ALPHAFOLD_DB:-/data}/mgnify/mgy_clusters_2018_12.fa
46 --pdb70_database_path \${ALPHAFOLD_DB:-/data}/pdb70/pdb70
47 --template_mmcif_dir \${ALPHAFOLD_DB:-/data}/pdb_mmcif/mmcif_files
48 --obsolete_pdbs_path \${ALPHAFOLD_DB:-/data}/pdb_mmcif/obsolete.dat
49 --max_template_date=\$DATE
50 --bfd_database_path \${ALPHAFOLD_DB:-/data}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt
51 --uniclust30_database_path \${ALPHAFOLD_DB:-/data}/uniclust30/uniclust30_2018_08/uniclust30_2018_08
52 --use_gpu_relax=True
53 &&
54
55 ## Uncomment for "dummy run" - skip alphafold run and read output from test-data
56 ## cp -r '$__tool_directory__/output' . &&
57
58 ## Generate additional outputs ------------
59 python3 '$__tool_directory__/gen_extra_outputs.py' output/alphafold $output_plddts &&
60
61 ## HTML output
62 mkdir -p '${ html.files_path }' &&
63 cp '$__tool_directory__/alphafold.html' '${html}' &&
64 cp output/alphafold/ranked_*.pdb '${html.files_path}'
65
66 ]]></command>
67 <inputs>
68 <conditional name="fasta_or_text">
69 <param name="input_mode" type="select" label="Fasta Input" help="Single protein sequence to fold. Input can be fasta file from history, or text. Provide only 1 sequence per job.">
70 <option value="history">Use fasta from history</option>
71 <option value="textbox">Paste sequence into textbox</option>
72 </param>
73 <when value="history">
74 <param name="fasta_file" type="data" format="fasta" label="Fasta file from history" help="Select single fasta protein sequence from your history. If you wish to fold multiple proteins, submit an individual job for each protein." />
75 </when>
76 <when value="textbox">
77 <param name="fasta_text" type="text" area="true" value="" label="Paste sequence" help="Paste single protein sequence into the textbox. If you wish to fold multiple proteins, submit individual jobs for each protein." />
78 </when>
79 </conditional>
80 <param name="output_plddts" type="boolean" checked="false" label="Output per-residue confidence scores" truevalue="--plddts" falsevalue="" help="Alphafold produces a pLDDT score between 0-100 for each residue in the folded models. High scores represent high confidence in placement for the residue, while low scoring residues have lower confidence. Sections of low confidence often occur in disordered regions. " />
81 </inputs>
82 <outputs>
83 <data name="model5" format="pdb" from_work_dir="output/alphafold/ranked_4.pdb" label="${tool.name} on ${on_string}: Model 5"/>
84 <data name="model4" format="pdb" from_work_dir="output/alphafold/ranked_3.pdb" label="${tool.name} on ${on_string}: Model 4"/>
85 <data name="model3" format="pdb" from_work_dir="output/alphafold/ranked_2.pdb" label="${tool.name} on ${on_string}: Model 3"/>
86 <data name="model2" format="pdb" from_work_dir="output/alphafold/ranked_1.pdb" label="${tool.name} on ${on_string}: Model 2"/>
87 <data name="model1" format="pdb" from_work_dir="output/alphafold/ranked_0.pdb" label="${tool.name} on ${on_string}: Model 1"/>
88 <data name="confidence_scores" format="tsv" from_work_dir="output/alphafold/model_confidence_scores.tsv" label="${tool.name} on ${on_string}: Model confidence scores"/>
89 <data name="plddts" format="tsv" from_work_dir="output/alphafold/plddts.tsv" label="${tool.name} on ${on_string}: Per-residue confidence scores (plddts)">
90 <filter>(output_plddts)</filter>
91 </data>
92 <data name="html" format="html" label="${tool.name} on ${on_string}: Visualization" />
93 </outputs>
94 <tests>
95 <test expect_num_outputs="8">
96 <conditional name="fasta_or_text">
97 <param name="input_mode" value="history"/>
98 <param name="fasta_file" value="test1.fasta"/>
99 </conditional>
100 <param name="output_plddts" value="true"/>
101 <output name="plddts">
102 <assert_contents>
103 <has_n_columns n="2"/>
104 <has_n_lines n="6"/>
105 <has_size value="2900" delta="300"/>
106 </assert_contents>
107 </output>
108 <output name="confidence_scores">
109 <assert_contents>
110 <has_n_columns n="2"/>
111 <has_n_lines n="6"/>
112 <has_size value="70" delta="50"/>
113 </assert_contents>
114 </output>
115 <output name="model1">
116 <assert_contents>
117 <has_n_columns n="12"/>
118 <has_n_lines n="1517"/>
119 <has_size value="123000" delta="10000"/>
120 </assert_contents>
121 </output>
122 <output name="model2">
123 <assert_contents>
124 <has_n_columns n="12"/>
125 <has_n_lines n="1517"/>
126 <has_size value="123000" delta="10000"/>
127 </assert_contents>
128 </output>
129 <output name="model3">
130 <assert_contents>
131 <has_n_columns n="12"/>
132 <has_n_lines n="1517"/>
133 <has_size value="123000" delta="10000"/>
134 </assert_contents>
135 </output>
136 <output name="model4">
137 <assert_contents>
138 <has_n_columns n="12"/>
139 <has_n_lines n="1517"/>
140 <has_size value="123000" delta="10000"/>
141 </assert_contents>
142 </output>
143 <output name="model5">
144 <assert_contents>
145 <has_n_columns n="12"/>
146 <has_n_lines n="1517"/>
147 <has_size value="123000" delta="10000"/>
148 </assert_contents>
149 </output>
150 </test>
151 </tests>
152 <help><![CDATA[
153
154 .. class:: infomark
155
156 **What it does**
157
158 | AlphaFold v2.0: AI-guided 3D structure prediction of proteins
159 |
160
161 *What is AlphaFold?*
162
163 | AlphaFold is a program which uses neural networks to predict the tertiary (3D) structure of proteins. AlphaFold accepts an amino acid sequence (in Fasta format), then will 'fold' that sequence into a 3D model.
164 | NOTE: AlphaFold has a number of versions - this tool uses AlphaFold v2.0.
165 |
166
167 *What makes AlphaFold different?*
168
169 | The ability to use computers to predict 3D protein structures with high accuracy is desirable because it removes the time-consuming and costly process of determining structures experimentally.
170 | In-silico protein folding has been an active field of research for decades, but existing tools ran more slowly and with less reliability than AlphaFold.
171 | AlphaFold represents a leap forward by regularly predicting structures to atomic-level accuracy, even when no similar structures are known.
172 |
173
174 *Downstream analysis*
175
176 | Obtaining a protein fold is the first step in many analyses.
177 | The 3D models created by AlphaFold can be used in downstream analysis, including the following:
178 |
179
180 - Inspecting protein features
181 3D viewers (pymol, chimera, ngl, blender) can be used to inspect active sites, regulatory domains, binding sites.
182 - Molecular docking
183 3D structures can be used to predict the binding affinity of different compounds.
184 This is especially useful in screening drug candidates.
185 - Protein-protein interactions
186 Proteins associate in many biological processes, including intracellular signalling pathways and protein complex formation.
187 To predict these interactions, other programs may ingest 3D models predicted by AlphaFold. Proprietary softwares include `GOLD <https://www.ccdc.cam.ac.uk/solutions/csd-discovery/components/gold/>`_ and `SeeSAR <https://www.biosolveit.de/SeeSAR>`_, but many `free and open-source options <https://en.wikipedia.org/wiki/List_of_protein-ligand_docking_software>`_ are available such as `AutoDock <https://autodock.scripps.edu/>`_ and `SwissDock <http://www.swissdock.ch/>`_.
188
189 *Expected run times*
190
191 .. image:: https://github.com/usegalaxy-au/galaxy-local-tools/blob/1a8d3e8daa7ccc5a345ca377697735ab95ed0666/tools/alphafold/static/img/alphafold_runtime_graph.png?raw=true
192 :height: 520
193 :alt: Run time graph
194
195 |
196 | In general, we observe a quadratic relationship between sequence length and time to fold.
197 | Once your job begins, a sequence of 50aa will take approximately 1hr to complete, while a sequence of 2000aa will take about 18hrs.
198 |
199
200 **Input**
201
202 *Amino acid sequence*
203
204 | AlphaFold accepts a **single amino acid sequence** in FASTA format.
205 | You can choose to input either a file from your Galaxy history or paste a sequence into a text box.
206 | Please paste only a single sequence - we can only process a single sequence per job.
207 | Multiple sequences will return an error.
208 |
209
210 **Outputs**
211
212 *Visualization*
213
214 | An interactive 3D graphic of the best predicted molecular structures.
215 | This output can be opened in Galaxy to give a visual impression of the results, with different structural representations to choose from.
216 | Open the "Visualization" history output by clicking on the "view data" icon:
217 |
218
219 .. image:: https://github.com/usegalaxy-au/galaxy-local-tools/blob/1a8d3e8daa7ccc5a345ca377697735ab95ed0666/tools/alphafold/static/img/alphafold-visualization.png?raw=true
220 :height: 520
221 :alt: Result visualization
222
223 |
224
225 *PDB files*
226
227 | Five PDB (Protein Data Bank) files will be created for the best ranking models predicted by AlphaFold.
228 | These files describe the molecular structures and can be used for downstream analysis. e.g. *in silico* molecular docking.
229 |
230
231 *Model confidence scores (optional)*
232
233 | This optional output produces a file which describes the confidence scores for each model (based on `pLDDTs <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3799472/>`_) which may be useful for downstream analysis.
234 | Model confidence scores are also included as a column in the default PDB output.
235 |
236
237 **External Resources**
238
239 We recommend checking out the
240 `Alphafold Protein Structure Database <https://alphafold.ebi.ac.uk/>`_,
241 which contains predicted sequences for thousands of Human proteins. See also:
242
243 - `Google Deepmind's article on AlphaFold <https://deepmind.com/blog/article/alphafold-a-solution-to-a-50-year-old-grand-challenge-in-biology>`_
244 - `AlphaFold source code on GitHub <https://github.com/deepmind/alphafold>`_
245
246 ]]></help>
247 <citations>
248 <citation type="doi">https://doi.org/10.1038/s41586-021-03819-2</citation>
249 </citations>
250 </tool>