comparison constava.xml @ 0:2ed0df0360e5 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/constava commit 77814d75404602f3fb6b791dd79a17653de22d45
author iuc
date Wed, 08 Oct 2025 20:13:34 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2ed0df0360e5
1 <tool id="constava" name="Constava" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="GPL-3.0-only">
2 <description>
3 calculates conformational-state probabilities and variability in structural ensembles
4 </description>
5 <macros>
6 <import>macros.xml</import>
7 </macros>
8 <edam_topics>
9 <edam_topic>topic_0130</edam_topic>
10 </edam_topics>
11 <edam_operations>
12 <edam_operation>operation_0249</edam_operation>
13 </edam_operations>
14 <xrefs>
15 <xref type="bio.tools">constava</xref>
16 <!-- https://bio.tools/constava -->
17 </xrefs>
18 <expand macro="requirements"/>
19 <version_command>constava --version</version_command>
20 <command detect_errors="aggressive" strict="true">
21 <![CDATA[
22 echo "[Galaxy command block at \$(date +'%Y-%m-%dT%H:%M:%S%z')] Starting Constava Tool execution from Galaxy Platform" &&
23 #for $input_file_id, $input_file in enumerate( $input_options.input_files ):
24 #if $input_file
25 #set ref_name = str($input_file.element_identifier)
26 ln -sv '${input_file}' '$ref_name' &&
27 #end if
28 #end for
29
30 #set angle_units = $input_options.input_degrees
31 echo "[Galaxy command block at \$(date +'%Y-%m-%dT%H:%M:%S%z')] Constava will use '$angle_units' as angle units (empty means Radians)" &&
32
33 #if str( $conformational_state_model_options.use_custom_model ) == "true":
34 echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Using custom model" &&
35
36 #if str( $conformational_state_model_options.use_custom_input_file ) == "true":
37 echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Using custom input file for data training: $conformational_state_model_options.custom_input_file.element_identifier" &&
38
39 #set custom_input_file_angle_units = $conformational_state_model_options.custom_input_file_degrees
40 #set ref_name_train_data = str($conformational_state_model_options.custom_input_file.element_identifier)
41 #if str( custom_input_file_angle_units ) != "":
42 #set custom_input_parameter="--input " + str( $conformational_state_model_options.custom_input_file.element_identifier ) + " " + $custom_input_file_angle_units
43 #else:
44 #set custom_input_parameter="--input " + str( $conformational_state_model_options.custom_input_file.element_identifier )
45 #end if
46
47 ln -sv '${conformational_state_model_options.custom_input_file}' '$ref_name_train_data' &&
48 #else:
49 echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Not using custom input file for data training" &&
50 #set custom_input_parameter=""
51 #end if
52
53 #if str( $conformational_state_model_options.model_type ) == "kde":
54 #set model_file = "custom_model.kde.pkl"
55 #set load_model_param="--load-model custom_model.kde.pkl"
56 echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Invoking the 'constava fit-model' module command for KDE" &&
57
58 constava fit-model -vv
59 --model-type kde
60 --kde-bandwidth $conformational_state_model_options.bandwidth
61 --output $model_file
62 #if str( $custom_input_parameter ) != "":
63 $custom_input_parameter
64 #end if
65 &&
66 echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] The 'constava fit-model' module command for KDE has finished" &&
67 #else if str( $conformational_state_model_options.model_type ) == "grid":
68 #set model_file = "custom_model.grid.pkl"
69 #set load_model_param="--load-model custom_model.grid.pkl"
70 echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Preparing the 'constava fit-model' module command for GRID" &&
71
72 constava fit-model -vv
73 --model-type grid
74 --grid-points $custom_model_grid_points
75 --kde-bandwidth $conformational_state_model_options.bandwidth
76 --output $model_file
77 #if str( $custom_input_parameter ) != "":
78 $custom_input_parameter
79 #end if
80 &&
81 echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] The 'constava fit-model' module command for GRID has finished" &&
82 #else:
83 #set load_model_param=""
84 #end if
85
86 #else:
87 echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Not using custom model" &&
88 #set load_model_param=""
89 #end if
90
91 echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Invoking the 'constava analyze' module command" &&
92
93 constava analyze -vv --precision $input_precision --input
94 #for $input_file_id, $input_file in enumerate( $input_files ):
95 #if $input_file:
96 #set ref_name = str($input_file.element_identifier)
97 #set input_format = str($input_file.ext)
98 $ref_name
99 #end if
100 #end for
101 --input-format $input_format
102 --output output_constava.csv
103 #if str( $subsampling_type ) == "window":
104 #if str( $return_window_series ) == "true":
105 --window-series $subsampling_options.window_size
106 #else:
107 --window $subsampling_options.window_size
108 #end if
109 #else if str( $subsampling_type ) == "bootstrap":
110 #if str( $return_bootstrap_series ) == "true":
111 --bootstrap-series $bootstrap_size
112 #else:
113 --bootstrap $bootstrap_size
114 #end if
115 --bootstrap-samples $bootstrap_samples --seed $bootstrap_seed
116 #end if
117 #if str ($load_model_param ) != "":
118 $load_model_param
119 #end if
120 #if str( $angle_units ) != "":
121 $angle_units
122 #end if
123 &&
124 echo "[Galaxy command block at \$(date +'%Y-%m-%dT%H:%M:%S%z')] Constava execution from Galaxy has finished"
125 ]]></command>
126 <environment_variables>
127 <environment_variable name="MPLBACKEND">Agg</environment_variable>
128 <environment_variable name="TQDM_DISABLE">1</environment_variable>
129 <environment_variable name="PYTHON_TQDM_DISABLE">1</environment_variable>
130 </environment_variables>
131 <inputs>
132 <section name="input_options" title="Input Options" expanded="true" help="As input data the backbone dihedral angles extracted from the conformational ensemble need to be provided.">
133 <param name="input_files" type="data" format="csv,xvg" label="Dihedral angles file" help="Upload the input file(s) that contain the dihedral angles in CSV or XVG format (GROMACS' `gmx chi` module). Important: Given Constava extracts RESNAME and RESINDEX from filenames when using XVG format, your files must follow this regex 'ramaPhiPsi([A-Z][A-Z0-9][A-Z0-9])([0-9]+).xvg'" multiple="true" argument="input"/>
134 <param name="input_degrees" type="select" label="Are the dihedral angles in that file in radians or degrees?" help="Indicate if the dihedral angles are in radians or degrees." argument="degrees">
135 <option value="--degrees">Degrees</option>
136 <option value="" selected="true">Radians</option>
137 </param>
138 <param name="input_precision" type="integer" label="Decimal precision" help="Sets the number of decimals in the output files." value="3" default_value="3" min="1" max="16" argument="precision"></param>
139 </section>
140 <section name="conformational_state_model_options" title="Kernel Options" help="By default, the conformational state models are generated on-the-fly when running Constava. In selected cases generating a model beforehand and loading it can be useful, though.">
141 <conditional name="custom_model">
142 <param name="use_custom_model" type="select" label="Do you want to train a custom probabilistic model of conformational states?">
143 <option value="false" selected="true">No</option>
144 <option value="true">Yes</option>
145 </param>
146 <when value="true"> <!-- Yes, train a custom probabilistic model of conformational states-->
147 <conditional name="model_type_options">
148 <param name="model_type" type="select" label="Select a model type" help="We provide two model types. KDE models are the default. They are fast to fit but may be slow in the inference in large conformational ensembles (e.g. long-timescale MD simulations). The idea of Grid models is, to replace the continuous probability density function of the kde-Model by a fixed set of grid-points. The PDF for any sample is then estimated by linear interpolation between the nearest grid points. This is slightly less accurate than the kde-Model but speeds up inference significantly.">
149 <option value="kde">KDE model</option>
150 <option value="grid">Grid model</option>
151 </param>
152 <when value="kde">
153 <param name="bandwidth" type="float" label="Which bandwidth do you want to use?" value="0.13" help="This flag controls the bandwidth of the Gaussian kernel density estimator. (default: 0.13)" argument="kde-bandwidth"/>
154 </when>
155 <when value="grid">
156 <param name="bandwidth" type="float" label="Which bandwidth do you want to use?" value="0.13" help="This flag controls the bandwidth of the Gaussian kernel density estimator. (default: 0.13)" argument="kde-bandwidth"/>
157 <param name="custom_model_grid_points" type="integer" label="Grid points" value="10000" help="This flag controls how many grid points are used to describe the probability density function. (default: 10000)" argument="grid-points"/>
158 </when>
159 </conditional>
160 <conditional name="custom_input_json">
161 <param name="use_custom_input_file" type="select" label="Do you want to train probability density functions with custom data?" help="If not, the default data from the publication will be used.">
162 <option value="false" selected="true">No</option>
163 <option value="true">Yes</option>
164 </param>
165 <when value="true">
166 <param name="custom_input_file" type="data" format="json" label="File for pdf fitting in JSON format" help="The data to which the new conformational state models will be fitted. It should be provided as a JSON file. The top-most key should indicate the names of the conformational states. On the level below, lists of phi-psi pairs for each stat should be provided." argument="input"/>
167 <param name="custom_input_file_degrees" type="select" label="Are the dihedral angles in that file in radians or degrees?" help="Indicate if the dihedral angles of the training data file are in radians or degrees." argument="degrees">
168 <option value="--degrees">Degrees</option>
169 <option value="" selected="true">Radians</option>
170 </param>
171 </when>
172 <when value="false"/>
173 </conditional>
174 </when>
175 <when value="false"/>
176 </conditional>
177 </section>
178 <section name="subsampling_options" title="Subsampling Options" help="Do inference using either a moving reading-frame of consecutive samples (sliding window) or using a moving reading-frame of consecutive samples (bootstrap).">
179 <conditional name="sampling_options">
180 <param name="subsampling_type" type="select" label="Select a subsampling method to configure" help="You must select and configure at least one subsampling option.">
181 <option value="window" selected="true">Sliding window</option>
182 <option value="bootstrap">Bootstrap sampling</option>
183 </param>
184 <when value="window">
185 <param name="window_size" type="text" label="Window size (space-separated integers)" value="3" help="Specify window sizes for moving frame analysis, e.g., '3 5 7'. Each reading frame consists of consecutive samples. Multiple values can be provided." argument="window">
186 <validator type="regex" message="Use one or more integers separated by single spaces.">^(\d+\s?)+$</validator>
187 </param>
188 <param name="return_window_series" type="boolean" label="Return the results for every window rather than the average." help="Return the results for every window rather than the average. This can result in very large output files." value="false" argument="window-series"/>
189 </when>
190 <when value="bootstrap">
191 <param name="bootstrap_size" type="text" label="Bootstrap size (space-separated integers)" value="3" help="Do inference using N samples obtained through bootstrapping. Specify bootstrap sizes, e.g., '10 20 30'. Samples obtained through bootstrapping. Multiple values can be provided." argument="bootstrap">
192 <validator type="regex" message="Use one or more integers separated by single spaces.">^(\d+\s?)+$</validator>
193 </param>
194 <param name="return_bootstrap_series" type="boolean" label="Return bootstrap series calculation" help="Return the results for every subsample rather than the average. This can result in very large output files." value="false" argument="bootstrap-series"/>
195 <param name="bootstrap_samples" type="integer" label="Bootstrap samples" value="10000" min="1" help="When bootstrapping, sample times from the input data."/>
196 <param name="bootstrap_seed" type="integer" label="Bootstrap seed" value="42" min="1" help="Set random seed for bootstrap sampling." argument="seed"/>
197 </when>
198 </conditional>
199 </section>
200 </inputs>
201 <outputs>
202 <data name="file_contents" format="csv" label="${tool.name} on ${on_string}: File Contents" from_work_dir="output_constava.csv"/>
203 </outputs>
204 <tests>
205 <!-- ======================================================================= -->
206 <!-- Test scenarios: Default PDF -->
207 <!-- ======================================================================= -->
208
209 <!-- test_001: CSV Dihedrals in radians with window_size 3 using default PDF -->
210 <test expect_num_outputs="1">
211 <section name="input_options">
212 <param name="input_files" value="csv/dihedrals.mini.csv"/>
213 <param name="input_degrees" value=""/>
214 <param name="input_precision" value="10"/>
215 </section>
216 <assert_command>
217 <has_text text="constava analyze -vv --precision 10 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3"/>
218 <not_has_text text="constava fit-model"/>
219 </assert_command>
220 <output name="file_contents" file="expected/test_001.csv" ftype="csv"/>
221 </test>
222
223 <!-- test_002: XVG Dihedrals in degrees with window_size 3 using default PDF -->
224 <test expect_num_outputs="1">
225 <section name="input_options">
226 <param name="input_files" value="xvg/ramaPhiPsiALA18.xvg,xvg/ramaPhiPsiGLY11.xvg,xvg/ramaPhiPsiLEU45.xvg,xvg/ramaPhiPsiTHR2.xvg,xvg/ramaPhiPsiALA19.xvg,xvg/ramaPhiPsiGLY22.xvg,xvg/ramaPhiPsiLYS16.xvg,xvg/ramaPhiPsiTHR4.xvg,xvg/ramaPhiPsiALA26.xvg,xvg/ramaPhiPsiGLY33.xvg,xvg/ramaPhiPsiLYS44.xvg,xvg/ramaPhiPsiTHR7.xvg,xvg/ramaPhiPsiARG29.xvg,xvg/ramaPhiPsiGLY36.xvg,xvg/ramaPhiPsiLYS48.xvg,xvg/ramaPhiPsiTRP31.xvg,xvg/ramaPhiPsiASN15.xvg,xvg/ramaPhiPsiGLY42.xvg,xvg/ramaPhiPsiLYS49.xvg,xvg/ramaPhiPsiTYR21.xvg,xvg/ramaPhiPsiASN27.xvg,xvg/ramaPhiPsiGLY50.xvg,xvg/ramaPhiPsiLYS9.xvg,xvg/ramaPhiPsiTYR5.xvg,xvg/ramaPhiPsiASN3.xvg,xvg/ramaPhiPsiILE17.xvg,xvg/ramaPhiPsiPHE40.xvg,xvg/ramaPhiPsiTYR6.xvg,xvg/ramaPhiPsiASN32.xvg,xvg/ramaPhiPsiILE34.xvg,xvg/ramaPhiPsiSER10.xvg,xvg/ramaPhiPsiVAL23.xvg,xvg/ramaPhiPsiASP12.xvg,xvg/ramaPhiPsiILE39.xvg,xvg/ramaPhiPsiSER24.xvg,xvg/ramaPhiPsiVAL25.xvg,xvg/ramaPhiPsiASP37.xvg,xvg/ramaPhiPsiILE46.xvg,xvg/ramaPhiPsiSER30.xvg,xvg/ramaPhiPsiVAL41.xvg,xvg/ramaPhiPsiGLN20.xvg,xvg/ramaPhiPsiLEU14.xvg,xvg/ramaPhiPsiSER35.xvg,xvg/ramaPhiPsiVAL47.xvg,xvg/ramaPhiPsiGLN43.xvg,xvg/ramaPhiPsiLEU28.xvg,xvg/ramaPhiPsiSER51.xvg,xvg/ramaPhiPsiVAL8.xvg,xvg/ramaPhiPsiGLY1.xvg,xvg/ramaPhiPsiLEU38.xvg,xvg/ramaPhiPsiTHR13.xvg" ftype="xvg"/>
227 <param name="input_degrees" value="--degrees"/>
228 <param name="input_precision" value="5"/>
229 </section>
230 <assert_command>
231 <has_text text="constava analyze -vv --precision 5 --input"/>
232 <has_text text="ramaPhiPsiALA18.xvg"/>
233 <has_text text="ramaPhiPsiGLY11.xvg"/>
234 <has_text text="ramaPhiPsiLEU45.xvg"/>
235 <has_text text="ramaPhiPsiTHR2.xvg"/>
236 <has_text text="ramaPhiPsiALA19.xvg"/>
237 <has_text text="ramaPhiPsiGLY22.xvg"/>
238 <has_text text="ramaPhiPsiLYS16.xvg"/>
239 <has_text text="ramaPhiPsiTHR4.xvg"/>
240 <has_text text="ramaPhiPsiALA26.xvg"/>
241 <has_text text="ramaPhiPsiGLY33.xvg"/>
242 <has_text text="ramaPhiPsiLYS44.xvg"/>
243 <has_text text="ramaPhiPsiTHR7.xvg"/>
244 <has_text text="ramaPhiPsiARG29.xvg"/>
245 <has_text text="ramaPhiPsiGLY36.xvg"/>
246 <has_text text="ramaPhiPsiLYS48.xvg"/>
247 <has_text text="ramaPhiPsiTRP31.xvg"/>
248 <has_text text="ramaPhiPsiASN15.xvg"/>
249 <has_text text="ramaPhiPsiGLY42.xvg"/>
250 <has_text text="ramaPhiPsiLYS49.xvg"/>
251 <has_text text="ramaPhiPsiTYR21.xvg"/>
252 <has_text text="ramaPhiPsiASN27.xvg"/>
253 <has_text text="ramaPhiPsiGLY50.xvg"/>
254 <has_text text="ramaPhiPsiLYS9.xvg"/>
255 <has_text text="ramaPhiPsiTYR5.xvg"/>
256 <has_text text="ramaPhiPsiASN3.xvg"/>
257 <has_text text="ramaPhiPsiILE17.xvg"/>
258 <has_text text="ramaPhiPsiPHE40.xvg"/>
259 <has_text text="ramaPhiPsiTYR6.xvg"/>
260 <has_text text="ramaPhiPsiASN32.xvg"/>
261 <has_text text="ramaPhiPsiILE34.xvg"/>
262 <has_text text="ramaPhiPsiSER10.xvg"/>
263 <has_text text="ramaPhiPsiVAL23.xvg"/>
264 <has_text text="ramaPhiPsiASP12.xvg"/>
265 <has_text text="ramaPhiPsiILE39.xvg"/>
266 <has_text text="ramaPhiPsiSER24.xvg"/>
267 <has_text text="ramaPhiPsiVAL25.xvg"/>
268 <has_text text="ramaPhiPsiASP37.xvg"/>
269 <has_text text="ramaPhiPsiILE46.xvg"/>
270 <has_text text="ramaPhiPsiSER30.xvg"/>
271 <has_text text="ramaPhiPsiVAL41.xvg"/>
272 <has_text text="ramaPhiPsiGLN20.xvg"/>
273 <has_text text="ramaPhiPsiLEU14.xvg"/>
274 <has_text text="ramaPhiPsiSER35.xvg"/>
275 <has_text text="ramaPhiPsiVAL47.xvg"/>
276 <has_text text="ramaPhiPsiGLN43.xvg"/>
277 <has_text text="ramaPhiPsiLEU28.xvg"/>
278 <has_text text="ramaPhiPsiSER51.xvg"/>
279 <has_text text="ramaPhiPsiVAL8.xvg"/>
280 <has_text text="ramaPhiPsiGLY1.xvg"/>
281 <has_text text="ramaPhiPsiLEU38.xvg"/>
282 <has_text text="ramaPhiPsiTHR13.xvg"/>
283 <has_text text="--input-format xvg --output output_constava.csv --window 3 --degrees"/>
284 <not_has_text text="constava fit-model"/>
285 </assert_command>
286 <output name="file_contents" file="expected/test_002.csv" ftype="csv"/>
287 </test>
288
289 <!-- ======================================================================= -->
290 <!-- Test scenarios: Custom PDF with default training data -->
291 <!-- ======================================================================= -->
292
293 <!-- test_003: custom probability density functions: kde -->
294 <test expect_num_outputs="1">
295 <section name="input_options">
296 <param name="input_files" value="csv/dihedrals.mini.csv"/>
297 <param name="input_degrees" value=""/>
298 <param name="input_precision" value="5"/>
299 </section>
300 <section name="conformational_state_model_options">
301 <conditional name="custom_model">
302 <param name="use_custom_model" value="true"/>
303 <conditional name="model_type_options">
304 <param name="model_type" value="kde"/>
305 <param name="bandwidth" value="0.15"/>
306 </conditional>
307 </conditional>
308 </section>
309 <assert_command>
310 <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/>
311 <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 --load-model custom_model.kde.pkl"/>
312 </assert_command>
313 <output name="file_contents" file="expected/test_003.csv" ftype="csv"/>
314 </test>
315
316 <!-- test_004: custom probability density functions: grid -->
317 <test expect_num_outputs="1">
318 <section name="input_options">
319 <param name="input_files" value="csv/dihedrals.mini.csv"/>
320 <param name="input_degrees" value=""/>
321 <param name="input_precision" value="5"/>
322 </section>
323 <section name="conformational_state_model_options">
324 <conditional name="custom_model">
325 <param name="use_custom_model" value="true"/>
326 <conditional name="model_type_options">
327 <param name="model_type" value="grid"/>
328 <param name="bandwidth" value="0.15"/>
329 <param name="custom_model_grid_points" value="1500"/>
330 </conditional>
331 </conditional>
332 </section>
333 <assert_command>
334 <has_text text="constava fit-model -vv --model-type grid --grid-points 1500 --kde-bandwidth 0.15 --output custom_model.grid.pkl"/>
335 <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 --load-model custom_model.grid.pkl"/>
336 </assert_command>
337 <output name="file_contents" file="expected/test_004.csv" ftype="csv"/>
338 </test>
339
340 <!-- ======================================================================= -->
341 <!-- Test scenarios: Custom PDF with custom training data in JSON -->
342 <!-- ======================================================================= -->
343
344 <!-- test_005: custom probability density functions with custom json: kde -->
345 <test expect_num_outputs="1">
346 <section name="input_options">
347 <param name="input_files" value="csv/dihedrals.mini.csv"/>
348 <param name="input_degrees" value=""/>
349 <param name="input_precision" value="5"/>
350 </section>
351 <section name="conformational_state_model_options">
352 <conditional name="custom_model">
353 <param name="use_custom_model" value="true"/>
354 <conditional name="model_type_options">
355 <param name="model_type" value="kde"/>
356 <param name="bandwidth" value="0.15"/>
357 </conditional>
358 <conditional name="custom_input_json">
359 <param name="use_custom_input_file" value="true"/>
360 <param name="custom_input_file" value="custom-training-data/constava_csdata.mini.json"/>
361 </conditional>
362 </conditional>
363 </section>
364 <assert_command>
365 <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl --input constava_csdata.mini.json"/>
366 <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 --load-model custom_model.kde.pkl"/>
367 </assert_command>
368 <output name="file_contents" file="expected/test_005.csv" ftype="csv"/>
369 </test>
370
371 <!-- test_006: custom probability density functions with custom json: grid -->
372 <test expect_num_outputs="1">
373 <section name="input_options">
374 <param name="input_files" value="csv/dihedrals.mini.csv"/>
375 <param name="input_degrees" value=""/>
376 <param name="input_precision" value="5"/>
377 </section>
378 <section name="conformational_state_model_options">
379 <conditional name="custom_model">
380 <param name="use_custom_model" value="true"/>
381 <conditional name="model_type_options">
382 <param name="model_type" value="grid"/>
383 <param name="bandwidth" value="0.15"/>
384 <param name="custom_model_grid_points" value="1500"/>
385 </conditional>
386 <conditional name="custom_input_json">
387 <param name="use_custom_input_file" value="true"/>
388 <param name="custom_input_file" value="custom-training-data/constava_csdata.mini.json"/>
389 </conditional>
390 </conditional>
391 </section>
392 <assert_command>
393 <has_text text="constava fit-model -vv --model-type grid --grid-points 1500 --kde-bandwidth 0.15 --output custom_model.grid.pkl --input constava_csdata.mini.json"/>
394 <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 --load-model custom_model.grid.pkl"/>
395 </assert_command>
396 <output name="file_contents" file="expected/test_006.csv" ftype="csv"/>
397 </test>
398 <!-- ======================================================================= -->
399 <!-- Test scenarios: Default PDF with bootstrap -->
400 <!-- ======================================================================= -->
401
402 <!-- test_007: CSV Dihedrals in radians with bootstrap with default train data -->
403 <test expect_num_outputs="1">
404 <section name="input_options">
405 <param name="input_files" value="csv/dihedrals.mini.csv"/>
406 <param name="input_degrees" value=""/>
407 <param name="input_precision" value="5"/>
408 </section>
409 <section name="conformational_state_model_options">
410 <conditional name="custom_model">
411 <param name="use_custom_model" value="false"/>
412 </conditional>
413 </section>
414 <section name="subsampling_options">
415 <conditional name="sampling_options">
416 <param name="subsampling_type" value="bootstrap"/>
417 <param name="bootstrap_size" value="3"/>
418 <param name="return_bootstrap_series" value="false"/>
419 <param name="bootstrap_samples" value="10"/>
420 <param name="bootstrap_seed" value="89"/>
421 </conditional>
422 </section>
423 <assert_command>
424 <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap 3 --bootstrap-samples 10 --seed 89"/>
425 <not_has_text text="constava fit-model"/>
426 </assert_command>
427 <output name="file_contents" file="expected/test_007.csv" ftype="csv"/>
428 </test>
429
430 <!-- test_008: CSV Dihedrals in radians with bootstrap series with default train data -->
431 <test expect_num_outputs="1">
432 <section name="input_options">
433 <param name="input_files" value="csv/dihedrals.mini.csv"/>
434 <param name="input_degrees" value=""/>
435 <param name="input_precision" value="5"/>
436 </section>
437 <section name="conformational_state_model_options">
438 <conditional name="custom_model">
439 <param name="use_custom_model" value="false"/>
440 </conditional>
441 </section>
442 <section name="subsampling_options">
443 <conditional name="sampling_options">
444 <param name="subsampling_type" value="bootstrap"/>
445 <param name="bootstrap_size" value="3"/>
446 <param name="return_bootstrap_series" value="true"/>
447 <param name="bootstrap_samples" value="10"/>
448 <param name="bootstrap_seed" value="89"/>
449 </conditional>
450 </section>
451 <assert_command>
452 <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap-series 3 --bootstrap-samples 10 --seed 89"/>
453 <not_has_text text="constava fit-model"/>
454 </assert_command>
455 <output name="file_contents" file="expected/test_008.csv" ftype="csv"/>
456 </test>
457
458 <!-- ======================================================================= -->
459 <!-- Test scenarios: Custom PDF with bootstrap -->
460 <!-- ======================================================================= -->
461
462 <!-- test_009: CSV Dihedrals in radians with bootstrap using custom PDF (kde) using default data to train the PDF -->
463 <test expect_num_outputs="1">
464 <section name="input_options">
465 <param name="input_files" value="csv/dihedrals.mini.csv"/>
466 <param name="input_degrees" value=""/>
467 <param name="input_precision" value="5"/>
468 </section>
469 <section name="conformational_state_model_options">
470 <conditional name="custom_model">
471 <param name="use_custom_model" value="true"/>
472 <conditional name="model_type_options">
473 <param name="model_type" value="kde"/>
474 <param name="bandwidth" value="0.15"/>
475 </conditional>
476 </conditional>
477 </section>
478 <section name="subsampling_options">
479 <conditional name="sampling_options">
480 <param name="subsampling_type" value="bootstrap"/>
481 <param name="bootstrap_size" value="3"/>
482 <param name="return_bootstrap_series" value="false"/>
483 <param name="bootstrap_samples" value="10"/>
484 <param name="bootstrap_seed" value="89"/>
485 </conditional>
486 </section>
487 <assert_command>
488 <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/>
489 <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap 3 --bootstrap-samples 10 --seed 89 --load-model custom_model.kde.pkl"/>
490 </assert_command>
491 <output name="file_contents" file="expected/test_009.csv" ftype="csv"/>
492 </test>
493
494 <!-- test_010: CSV Dihedrals in radians with bootstrap using custom PDF (kde) using custom training data in json -->
495 <test expect_num_outputs="1">
496 <section name="input_options">
497 <param name="input_files" value="csv/dihedrals.mini.csv"/>
498 <param name="input_degrees" value=""/>
499 <param name="input_precision" value="5"/>
500 </section>
501 <section name="conformational_state_model_options">
502 <conditional name="custom_model">
503 <param name="use_custom_model" value="true"/>
504 <conditional name="model_type_options">
505 <param name="model_type" value="kde"/>
506 <param name="bandwidth" value="0.15"/>
507 </conditional>
508 <conditional name="custom_input_json">
509 <param name="use_custom_input_file" value="true"/>
510 <param name="custom_input_file" value="custom-training-data/constava_csdata.mini.json"/>
511 </conditional>
512 </conditional>
513 </section>
514 <section name="subsampling_options">
515 <conditional name="sampling_options">
516 <param name="subsampling_type" value="bootstrap"/>
517 <param name="bootstrap_size" value="3"/>
518 <param name="return_bootstrap_series" value="false"/>
519 <param name="bootstrap_samples" value="10"/>
520 <param name="bootstrap_seed" value="89"/>
521 </conditional>
522 </section>
523 <assert_command>
524 <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl --input constava_csdata.mini.json"/>
525 <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap 3 --bootstrap-samples 10 --seed 89 --load-model custom_model.kde.pkl"/>
526 </assert_command>
527 <output name="file_contents" file="expected/test_010.csv" ftype="csv"/>
528 </test>
529
530 <!-- test_011: CSV Dihedrals in radians with bootstrap series using custom PDF (kde) using default data to train the PDF -->
531 <test expect_num_outputs="1">
532 <section name="input_options">
533 <param name="input_files" value="csv/dihedrals.mini.csv"/>
534 <param name="input_degrees" value=""/>
535 <param name="input_precision" value="5"/>
536 </section>
537 <section name="conformational_state_model_options">
538 <conditional name="custom_model">
539 <param name="use_custom_model" value="true"/>
540 <conditional name="model_type_options">
541 <param name="model_type" value="kde"/>
542 <param name="bandwidth" value="0.15"/>
543 </conditional>
544 </conditional>
545 </section>
546 <section name="subsampling_options">
547 <conditional name="sampling_options">
548 <param name="subsampling_type" value="bootstrap"/>
549 <param name="bootstrap_size" value="3"/>
550 <param name="return_bootstrap_series" value="true"/>
551 <param name="bootstrap_samples" value="10"/>
552 <param name="bootstrap_seed" value="89"/>
553 </conditional>
554 </section>
555 <assert_command>
556 <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/>
557 <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap-series 3 --bootstrap-samples 10 --seed 89 --load-model custom_model.kde.pkl"/>
558 </assert_command>
559 <output name="file_contents" file="expected/test_011.csv" ftype="csv"/>
560 </test>
561
562 <!-- test_012: CSV Dihedrals in radians with bootstrap series using custom PDF (kde) using custom training data in json -->
563 <test expect_num_outputs="1">
564 <section name="input_options">
565 <param name="input_files" value="csv/dihedrals.mini.csv"/>
566 <param name="input_degrees" value=""/>
567 <param name="input_precision" value="5"/>
568 </section>
569 <section name="conformational_state_model_options">
570 <conditional name="custom_model">
571 <param name="use_custom_model" value="true"/>
572 <conditional name="model_type_options">
573 <param name="model_type" value="kde"/>
574 <param name="bandwidth" value="0.15"/>
575 </conditional>
576 <conditional name="custom_input_json">
577 <param name="use_custom_input_file" value="true"/>
578 <param name="custom_input_file" value="custom-training-data/constava_csdata.mini.json"/>
579 </conditional>
580 </conditional>
581 </section>
582 <section name="subsampling_options">
583 <conditional name="sampling_options">
584 <param name="subsampling_type" value="bootstrap"/>
585 <param name="bootstrap_size" value="3"/>
586 <param name="return_bootstrap_series" value="true"/>
587 <param name="bootstrap_samples" value="10"/>
588 <param name="bootstrap_seed" value="89"/>
589 </conditional>
590 </section>
591 <assert_command>
592 <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl --input constava_csdata.mini.json"/>
593 <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap-series 3 --bootstrap-samples 10 --seed 89 --load-model custom_model.kde.pkl"/>
594 </assert_command>
595 <output name="file_contents" file="expected/test_012.csv" ftype="csv"/>
596 </test>
597
598 <!-- test_013: Testing the bootstrap validators -->
599 <test expect_num_outputs="1">
600 <section name="input_options">
601 <param name="input_files" value="csv/dihedrals.mini.csv"/>
602 <param name="input_degrees" value=""/>
603 <param name="input_precision" value="5"/>
604 </section>
605 <section name="conformational_state_model_options">
606 <conditional name="custom_model">
607 <param name="use_custom_model" value="true"/>
608 <conditional name="model_type_options">
609 <param name="model_type" value="kde"/>
610 <param name="bandwidth" value="0.15"/>
611 </conditional>
612 </conditional>
613 </section>
614 <section name="subsampling_options">
615 <conditional name="sampling_options">
616 <param name="subsampling_type" value="bootstrap"/>
617 <param name="bootstrap_size" value="10 20"/>
618 <param name="return_bootstrap_series" value="false"/>
619 <param name="bootstrap_samples" value="10"/>
620 <param name="bootstrap_seed" value="18"/>
621 </conditional>
622 </section>
623 <assert_command>
624 <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/>
625 <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap 10 20 --bootstrap-samples 10 --seed 18 --load-model custom_model.kde.pkl"/>
626 </assert_command>
627 <output name="file_contents" file="expected/test_013.csv" ftype="csv"/>
628 </test>
629 <!-- test_014: Testing the window validators -->
630 <test expect_num_outputs="1">
631 <section name="input_options">
632 <param name="input_files" value="csv/dihedrals.mini.csv"/>
633 <param name="input_degrees" value=""/>
634 <param name="input_precision" value="5"/>
635 </section>
636 <section name="conformational_state_model_options">
637 <conditional name="custom_model">
638 <param name="use_custom_model" value="true"/>
639 <conditional name="model_type_options">
640 <param name="model_type" value="kde"/>
641 <param name="bandwidth" value="0.15"/>
642 </conditional>
643 </conditional>
644 </section>
645 <section name="subsampling_options">
646 <conditional name="sampling_options">
647 <param name="subsampling_type" value="window"/>
648 <param name="window_size" value="3 5 7"/>
649 </conditional>
650 </section>
651 <assert_command>
652 <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/>
653 <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 5 7 --load-model custom_model.kde.pkl"/>
654 </assert_command>
655 <output name="file_contents" file="expected/test_014.csv" ftype="csv"/>
656 </test>
657 <!-- test_015: Testing the window series validators -->
658 <test expect_num_outputs="1">
659 <section name="input_options">
660 <param name="input_files" value="csv/dihedrals.mini.csv"/>
661 <param name="input_degrees" value=""/>
662 <param name="input_precision" value="3"/>
663 </section>
664 <section name="conformational_state_model_options">
665 <conditional name="custom_model">
666 <param name="use_custom_model" value="true"/>
667 <conditional name="model_type_options">
668 <param name="model_type" value="kde"/>
669 <param name="bandwidth" value="0.15"/>
670 </conditional>
671 </conditional>
672 </section>
673 <section name="subsampling_options">
674 <conditional name="sampling_options">
675 <param name="subsampling_type" value="window"/>
676 <param name="window_size" value="5 7 9 11"/>
677 <param name="return_window_series" value="true"/>
678 </conditional>
679 </section>
680 <assert_command>
681 <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/>
682 <has_text text="constava analyze -vv --precision 3 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window-series 5 7 9 11 --load-model custom_model.kde.pkl"/>
683 </assert_command>
684 <output name="file_contents" file="expected/test_015.csv" ftype="csv"/>
685 </test>
686 </tests>
687 <creator>
688 <person name="José Gavalda-Garcia" honorificPrefix="Dr" identifier="0000-0001-6431-3442" url="https://orcid.org/0000-0001-6431-3442"/>
689 <person name="David Bickel" honorificPrefix="Dr" identifier="0000-0003-0332-8338" url="https://orcid.org/0000-0003-0332-8338"/>
690 <person name="Joel Roca-Martinez" honorificPrefix="Dr" identifier="0000-0002-4313-3845" url="https://orcid.org/0000-0002-4313-3845"/>
691 <person name="Daniele Raimondi" honorificPrefix="Dr" identifier="0000-0003-1157-1899" url="https://orcid.org/0000-0003-1157-1899"/>
692 <person name="Gabriele Orlando" honorificPrefix="Dr" identifier="0000-0002-5935-5258" url="https://orcid.org/0000-0002-5935-5258"/>
693 <person name="Wim F. Vranken" honorificPrefix="Dr" email="wim.vranken@vub.be" identifier="0000-0001-7470-4324" url="https://orcid.org/0000-0001-7470-4324"/>
694 <person name="Iman Jouiad" />
695 <person name="Boris Depoortere" email="boris.depoortere@vib.be" identifier="0009-0002-2539-116X" url="https://orcid.org/0009-0002-2539-116X"/>
696 <person name="Adrián Díaz" email="adrian.diaz@vub.be" identifier="0000-0003-0165-1318" url="https://orcid.org/0000-0003-0165-1318"/>
697 <organization name="Bio2Byte, Vrije Universiteit Brussel (VUB)" address="Interuniversity Institute Bioinformatics Brussels, Université Libre de Bruxelles, 1050 Ixelles, Brussels, Belgium" url="https://bio2byte.be/rrmscorer" email="bio2byte@vub.be" image="https://0.gravatar.com/avatar/2b51fb7600d876086669bcc85a941b763a81d1c2bb3c667b8c83a1aa892cf740"/>
698 </creator>
699 <help><![CDATA[
700 `Constava <https://pypi.org/project/constava/>`_ analyzes conformational ensembles to calculate **conformational state propensities**
701 and **conformational state variability**.
702
703 **Conformational state propensities** describe how likely each residue is to occupy a given conformational state,
704 whereas **conformational state variability** measures the residue's ability to transition between conformational states.
705
706 Each conformational state is represented by a statistical model derived from the backbone dihedral angles (φ, ψ).
707 The default models were obtained from an analysis of NMR ensembles and chemical shifts.
708 To perform an analysis, you must provide φ- and ψ-angles for each conformational state in the ensemble.
709
710 The conformational states were defined according to residue behavior across NMR ensembles:
711
712 - **Core helix** (column ``coreHelix``): Residues that exclusively adopt a helical conformation in all models of their associated ensemble, with shiftCrypt values ≤ 0.2 (N = 93,957 residues).
713 - **Surrounding helix** (column ``surrHelix``): Residues that adopt a helical conformation in the majority of models, with shiftCrypt values in the range (0.2, 0.4] (N = 8,180 residues).
714 - **Core sheet** (column ``coreSheet``): Residues that exclusively adopt an extended conformation in all models, with shiftCrypt values ≥ 0.8 (N = 47,280 residues).
715 - **Surrounding sheet** (column ``surrSheet``): Residues that adopt an extended conformation in most models, with shiftCrypt values in the range [0.6, 0.8) (N = 11,280 residues).
716 - **Turn** (column ``Turn``): Residues that adopt a turn conformation in most models, with shiftCrypt values in the range (0.4, 0.6) (N = 75,377 residues).
717 - **Other** (column ``Other``): Residues that adopt a coil conformation in most models, also with shiftCrypt values in the range (0.4, 0.6) (N = 74,542 residues).
718
719 **Input Data and Parameters**
720
721 *Constava* requires backbone dihedral angles extracted from the conformational ensemble as input data.
722 These angles can be generated with *GROMACS* using the ``gmx chi`` module (set the input format to `'xvg'`),
723 or they can be obtained using the Python submodule ``constava dihedrals``, which supports a wide range of molecular dynamics and structure formats.
724
725 - **Input files:** Provide files containing the dihedral angles. Supported formats include CSV and XVG.
726 - **Angle units:** Specify whether the dihedral angles in your files are expressed in radians or degrees.
727
728 .. class:: infomark
729
730 **Example files:** Example datasets in both formats are available in the
731 `data directory on GitHub <https://github.com/Bio2Byte/constava/tree/main/constava/data>`_.
732
733 The ``constava dihedrals`` submodule extracts backbone dihedral angles from conformational ensembles.
734 By default, it outputs the results in radians, which is the preferred format for ``constava analyze``.
735
736 **Kernel Options**
737
738 Configure the probability density functions (PDFs) used in the analysis. You can choose predefined PDFs
739 or fit custom ones from your own data.
740
741 **Subsampling Options**
742
743 You can apply different subsampling strategies, such as window-based analysis or bootstrap sampling,
744 to assess data variability and statistical robustness.
745
746 - **Window size:** Define the frame size for moving-window analyses (multiple consecutive samples).
747 - **Bootstrap size:** Specify the number of bootstrap samples to improve statistical confidence.
748
749 **Results and Output Files**
750
751 *Constava* produces an output file containing the calculated variability and propensity measures.
752 The file format and level of detail depend on your configuration and the selected subsampling options.
753
754 - **Output precision:** Set the decimal places to include in the output file.
755
756 .. class:: warningmark
757
758 **Note:** Accurate results require careful parameter selection. Default settings are provided for convenience,
759 but they may need adjustment depending on your dataset and analysis goals.
760
761 **Project Links**
762
763 This tool uses the Python package `constava` available via PyPI and BioConda.
764
765 - Source code repository on `GitHub <https://github.com/Bio2Byte/constava>`_
766 - Python package on `PyPI <https://pypi.org/project/constava/>`_
767 - Conda recipe on `BioConda <https://bioconda.github.io/recipes/constava/README.html>`_
768 - Conda package on `Anaconda (BioConda channel) <https://anaconda.org/bioconda/constava>`_
769 - Tool profile on `Bio.Tools <https://bio.tools/constava>`_
770 ]]></help>
771 <citations>
772 <citation type="doi">10.1093/nargab/lqae082</citation>
773 <citation type="doi">10.1016/j.jmb.2024.168900</citation>
774 </citations>
775 </tool>