Mercurial > repos > iuc > constava
diff constava.xml @ 0:2ed0df0360e5 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/constava commit 77814d75404602f3fb6b791dd79a17653de22d45
| author | iuc |
|---|---|
| date | Wed, 08 Oct 2025 20:13:34 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/constava.xml Wed Oct 08 20:13:34 2025 +0000 @@ -0,0 +1,775 @@ +<tool id="constava" name="Constava" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="GPL-3.0-only"> + <description> + calculates conformational-state probabilities and variability in structural ensembles + </description> + <macros> + <import>macros.xml</import> + </macros> + <edam_topics> + <edam_topic>topic_0130</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_0249</edam_operation> + </edam_operations> + <xrefs> + <xref type="bio.tools">constava</xref> + <!-- https://bio.tools/constava --> + </xrefs> + <expand macro="requirements"/> + <version_command>constava --version</version_command> + <command detect_errors="aggressive" strict="true"> + <![CDATA[ + echo "[Galaxy command block at \$(date +'%Y-%m-%dT%H:%M:%S%z')] Starting Constava Tool execution from Galaxy Platform" && + #for $input_file_id, $input_file in enumerate( $input_options.input_files ): + #if $input_file + #set ref_name = str($input_file.element_identifier) + ln -sv '${input_file}' '$ref_name' && + #end if + #end for + + #set angle_units = $input_options.input_degrees + echo "[Galaxy command block at \$(date +'%Y-%m-%dT%H:%M:%S%z')] Constava will use '$angle_units' as angle units (empty means Radians)" && + + #if str( $conformational_state_model_options.use_custom_model ) == "true": + echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Using custom model" && + + #if str( $conformational_state_model_options.use_custom_input_file ) == "true": + echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Using custom input file for data training: $conformational_state_model_options.custom_input_file.element_identifier" && + + #set custom_input_file_angle_units = $conformational_state_model_options.custom_input_file_degrees + #set ref_name_train_data = str($conformational_state_model_options.custom_input_file.element_identifier) + #if str( custom_input_file_angle_units ) != "": + #set custom_input_parameter="--input " + str( $conformational_state_model_options.custom_input_file.element_identifier ) + " " + $custom_input_file_angle_units + #else: + #set custom_input_parameter="--input " + str( $conformational_state_model_options.custom_input_file.element_identifier ) + #end if + + ln -sv '${conformational_state_model_options.custom_input_file}' '$ref_name_train_data' && + #else: + echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Not using custom input file for data training" && + #set custom_input_parameter="" + #end if + + #if str( $conformational_state_model_options.model_type ) == "kde": + #set model_file = "custom_model.kde.pkl" + #set load_model_param="--load-model custom_model.kde.pkl" + echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Invoking the 'constava fit-model' module command for KDE" && + + constava fit-model -vv + --model-type kde + --kde-bandwidth $conformational_state_model_options.bandwidth + --output $model_file + #if str( $custom_input_parameter ) != "": + $custom_input_parameter + #end if + && + echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] The 'constava fit-model' module command for KDE has finished" && + #else if str( $conformational_state_model_options.model_type ) == "grid": + #set model_file = "custom_model.grid.pkl" + #set load_model_param="--load-model custom_model.grid.pkl" + echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Preparing the 'constava fit-model' module command for GRID" && + + constava fit-model -vv + --model-type grid + --grid-points $custom_model_grid_points + --kde-bandwidth $conformational_state_model_options.bandwidth + --output $model_file + #if str( $custom_input_parameter ) != "": + $custom_input_parameter + #end if + && + echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] The 'constava fit-model' module command for GRID has finished" && + #else: + #set load_model_param="" + #end if + + #else: + echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Not using custom model" && + #set load_model_param="" + #end if + + echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Invoking the 'constava analyze' module command" && + + constava analyze -vv --precision $input_precision --input + #for $input_file_id, $input_file in enumerate( $input_files ): + #if $input_file: + #set ref_name = str($input_file.element_identifier) + #set input_format = str($input_file.ext) + $ref_name + #end if + #end for + --input-format $input_format + --output output_constava.csv + #if str( $subsampling_type ) == "window": + #if str( $return_window_series ) == "true": + --window-series $subsampling_options.window_size + #else: + --window $subsampling_options.window_size + #end if + #else if str( $subsampling_type ) == "bootstrap": + #if str( $return_bootstrap_series ) == "true": + --bootstrap-series $bootstrap_size + #else: + --bootstrap $bootstrap_size + #end if + --bootstrap-samples $bootstrap_samples --seed $bootstrap_seed + #end if + #if str ($load_model_param ) != "": + $load_model_param + #end if + #if str( $angle_units ) != "": + $angle_units + #end if + && + echo "[Galaxy command block at \$(date +'%Y-%m-%dT%H:%M:%S%z')] Constava execution from Galaxy has finished" + ]]></command> + <environment_variables> + <environment_variable name="MPLBACKEND">Agg</environment_variable> + <environment_variable name="TQDM_DISABLE">1</environment_variable> + <environment_variable name="PYTHON_TQDM_DISABLE">1</environment_variable> + </environment_variables> + <inputs> + <section name="input_options" title="Input Options" expanded="true" help="As input data the backbone dihedral angles extracted from the conformational ensemble need to be provided."> + <param name="input_files" type="data" format="csv,xvg" label="Dihedral angles file" help="Upload the input file(s) that contain the dihedral angles in CSV or XVG format (GROMACS' `gmx chi` module). Important: Given Constava extracts RESNAME and RESINDEX from filenames when using XVG format, your files must follow this regex 'ramaPhiPsi([A-Z][A-Z0-9][A-Z0-9])([0-9]+).xvg'" multiple="true" argument="input"/> + <param name="input_degrees" type="select" label="Are the dihedral angles in that file in radians or degrees?" help="Indicate if the dihedral angles are in radians or degrees." argument="degrees"> + <option value="--degrees">Degrees</option> + <option value="" selected="true">Radians</option> + </param> + <param name="input_precision" type="integer" label="Decimal precision" help="Sets the number of decimals in the output files." value="3" default_value="3" min="1" max="16" argument="precision"></param> + </section> + <section name="conformational_state_model_options" title="Kernel Options" help="By default, the conformational state models are generated on-the-fly when running Constava. In selected cases generating a model beforehand and loading it can be useful, though."> + <conditional name="custom_model"> + <param name="use_custom_model" type="select" label="Do you want to train a custom probabilistic model of conformational states?"> + <option value="false" selected="true">No</option> + <option value="true">Yes</option> + </param> + <when value="true"> <!-- Yes, train a custom probabilistic model of conformational states--> + <conditional name="model_type_options"> + <param name="model_type" type="select" label="Select a model type" help="We provide two model types. KDE models are the default. They are fast to fit but may be slow in the inference in large conformational ensembles (e.g. long-timescale MD simulations). The idea of Grid models is, to replace the continuous probability density function of the kde-Model by a fixed set of grid-points. The PDF for any sample is then estimated by linear interpolation between the nearest grid points. This is slightly less accurate than the kde-Model but speeds up inference significantly."> + <option value="kde">KDE model</option> + <option value="grid">Grid model</option> + </param> + <when value="kde"> + <param name="bandwidth" type="float" label="Which bandwidth do you want to use?" value="0.13" help="This flag controls the bandwidth of the Gaussian kernel density estimator. (default: 0.13)" argument="kde-bandwidth"/> + </when> + <when value="grid"> + <param name="bandwidth" type="float" label="Which bandwidth do you want to use?" value="0.13" help="This flag controls the bandwidth of the Gaussian kernel density estimator. (default: 0.13)" argument="kde-bandwidth"/> + <param name="custom_model_grid_points" type="integer" label="Grid points" value="10000" help="This flag controls how many grid points are used to describe the probability density function. (default: 10000)" argument="grid-points"/> + </when> + </conditional> + <conditional name="custom_input_json"> + <param name="use_custom_input_file" type="select" label="Do you want to train probability density functions with custom data?" help="If not, the default data from the publication will be used."> + <option value="false" selected="true">No</option> + <option value="true">Yes</option> + </param> + <when value="true"> + <param name="custom_input_file" type="data" format="json" label="File for pdf fitting in JSON format" help="The data to which the new conformational state models will be fitted. It should be provided as a JSON file. The top-most key should indicate the names of the conformational states. On the level below, lists of phi-psi pairs for each stat should be provided." argument="input"/> + <param name="custom_input_file_degrees" type="select" label="Are the dihedral angles in that file in radians or degrees?" help="Indicate if the dihedral angles of the training data file are in radians or degrees." argument="degrees"> + <option value="--degrees">Degrees</option> + <option value="" selected="true">Radians</option> + </param> + </when> + <when value="false"/> + </conditional> + </when> + <when value="false"/> + </conditional> + </section> + <section name="subsampling_options" title="Subsampling Options" help="Do inference using either a moving reading-frame of consecutive samples (sliding window) or using a moving reading-frame of consecutive samples (bootstrap)."> + <conditional name="sampling_options"> + <param name="subsampling_type" type="select" label="Select a subsampling method to configure" help="You must select and configure at least one subsampling option."> + <option value="window" selected="true">Sliding window</option> + <option value="bootstrap">Bootstrap sampling</option> + </param> + <when value="window"> + <param name="window_size" type="text" label="Window size (space-separated integers)" value="3" help="Specify window sizes for moving frame analysis, e.g., '3 5 7'. Each reading frame consists of consecutive samples. Multiple values can be provided." argument="window"> + <validator type="regex" message="Use one or more integers separated by single spaces.">^(\d+\s?)+$</validator> + </param> + <param name="return_window_series" type="boolean" label="Return the results for every window rather than the average." help="Return the results for every window rather than the average. This can result in very large output files." value="false" argument="window-series"/> + </when> + <when value="bootstrap"> + <param name="bootstrap_size" type="text" label="Bootstrap size (space-separated integers)" value="3" help="Do inference using N samples obtained through bootstrapping. Specify bootstrap sizes, e.g., '10 20 30'. Samples obtained through bootstrapping. Multiple values can be provided." argument="bootstrap"> + <validator type="regex" message="Use one or more integers separated by single spaces.">^(\d+\s?)+$</validator> + </param> + <param name="return_bootstrap_series" type="boolean" label="Return bootstrap series calculation" help="Return the results for every subsample rather than the average. This can result in very large output files." value="false" argument="bootstrap-series"/> + <param name="bootstrap_samples" type="integer" label="Bootstrap samples" value="10000" min="1" help="When bootstrapping, sample times from the input data."/> + <param name="bootstrap_seed" type="integer" label="Bootstrap seed" value="42" min="1" help="Set random seed for bootstrap sampling." argument="seed"/> + </when> + </conditional> + </section> + </inputs> + <outputs> + <data name="file_contents" format="csv" label="${tool.name} on ${on_string}: File Contents" from_work_dir="output_constava.csv"/> + </outputs> + <tests> + <!-- ======================================================================= --> + <!-- Test scenarios: Default PDF --> + <!-- ======================================================================= --> + + <!-- test_001: CSV Dihedrals in radians with window_size 3 using default PDF --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="csv/dihedrals.mini.csv"/> + <param name="input_degrees" value=""/> + <param name="input_precision" value="10"/> + </section> + <assert_command> + <has_text text="constava analyze -vv --precision 10 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3"/> + <not_has_text text="constava fit-model"/> + </assert_command> + <output name="file_contents" file="expected/test_001.csv" ftype="csv"/> + </test> + + <!-- test_002: XVG Dihedrals in degrees with window_size 3 using default PDF --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="xvg/ramaPhiPsiALA18.xvg,xvg/ramaPhiPsiGLY11.xvg,xvg/ramaPhiPsiLEU45.xvg,xvg/ramaPhiPsiTHR2.xvg,xvg/ramaPhiPsiALA19.xvg,xvg/ramaPhiPsiGLY22.xvg,xvg/ramaPhiPsiLYS16.xvg,xvg/ramaPhiPsiTHR4.xvg,xvg/ramaPhiPsiALA26.xvg,xvg/ramaPhiPsiGLY33.xvg,xvg/ramaPhiPsiLYS44.xvg,xvg/ramaPhiPsiTHR7.xvg,xvg/ramaPhiPsiARG29.xvg,xvg/ramaPhiPsiGLY36.xvg,xvg/ramaPhiPsiLYS48.xvg,xvg/ramaPhiPsiTRP31.xvg,xvg/ramaPhiPsiASN15.xvg,xvg/ramaPhiPsiGLY42.xvg,xvg/ramaPhiPsiLYS49.xvg,xvg/ramaPhiPsiTYR21.xvg,xvg/ramaPhiPsiASN27.xvg,xvg/ramaPhiPsiGLY50.xvg,xvg/ramaPhiPsiLYS9.xvg,xvg/ramaPhiPsiTYR5.xvg,xvg/ramaPhiPsiASN3.xvg,xvg/ramaPhiPsiILE17.xvg,xvg/ramaPhiPsiPHE40.xvg,xvg/ramaPhiPsiTYR6.xvg,xvg/ramaPhiPsiASN32.xvg,xvg/ramaPhiPsiILE34.xvg,xvg/ramaPhiPsiSER10.xvg,xvg/ramaPhiPsiVAL23.xvg,xvg/ramaPhiPsiASP12.xvg,xvg/ramaPhiPsiILE39.xvg,xvg/ramaPhiPsiSER24.xvg,xvg/ramaPhiPsiVAL25.xvg,xvg/ramaPhiPsiASP37.xvg,xvg/ramaPhiPsiILE46.xvg,xvg/ramaPhiPsiSER30.xvg,xvg/ramaPhiPsiVAL41.xvg,xvg/ramaPhiPsiGLN20.xvg,xvg/ramaPhiPsiLEU14.xvg,xvg/ramaPhiPsiSER35.xvg,xvg/ramaPhiPsiVAL47.xvg,xvg/ramaPhiPsiGLN43.xvg,xvg/ramaPhiPsiLEU28.xvg,xvg/ramaPhiPsiSER51.xvg,xvg/ramaPhiPsiVAL8.xvg,xvg/ramaPhiPsiGLY1.xvg,xvg/ramaPhiPsiLEU38.xvg,xvg/ramaPhiPsiTHR13.xvg" ftype="xvg"/> + <param name="input_degrees" value="--degrees"/> + <param name="input_precision" value="5"/> + </section> + <assert_command> + <has_text text="constava analyze -vv --precision 5 --input"/> + <has_text text="ramaPhiPsiALA18.xvg"/> + <has_text text="ramaPhiPsiGLY11.xvg"/> + <has_text text="ramaPhiPsiLEU45.xvg"/> + <has_text text="ramaPhiPsiTHR2.xvg"/> + <has_text text="ramaPhiPsiALA19.xvg"/> + <has_text text="ramaPhiPsiGLY22.xvg"/> + <has_text text="ramaPhiPsiLYS16.xvg"/> + <has_text text="ramaPhiPsiTHR4.xvg"/> + <has_text text="ramaPhiPsiALA26.xvg"/> + <has_text text="ramaPhiPsiGLY33.xvg"/> + <has_text text="ramaPhiPsiLYS44.xvg"/> + <has_text text="ramaPhiPsiTHR7.xvg"/> + <has_text text="ramaPhiPsiARG29.xvg"/> + <has_text text="ramaPhiPsiGLY36.xvg"/> + <has_text text="ramaPhiPsiLYS48.xvg"/> + <has_text text="ramaPhiPsiTRP31.xvg"/> + <has_text text="ramaPhiPsiASN15.xvg"/> + <has_text text="ramaPhiPsiGLY42.xvg"/> + <has_text text="ramaPhiPsiLYS49.xvg"/> + <has_text text="ramaPhiPsiTYR21.xvg"/> + <has_text text="ramaPhiPsiASN27.xvg"/> + <has_text text="ramaPhiPsiGLY50.xvg"/> + <has_text text="ramaPhiPsiLYS9.xvg"/> + <has_text text="ramaPhiPsiTYR5.xvg"/> + <has_text text="ramaPhiPsiASN3.xvg"/> + <has_text text="ramaPhiPsiILE17.xvg"/> + <has_text text="ramaPhiPsiPHE40.xvg"/> + <has_text text="ramaPhiPsiTYR6.xvg"/> + <has_text text="ramaPhiPsiASN32.xvg"/> + <has_text text="ramaPhiPsiILE34.xvg"/> + <has_text text="ramaPhiPsiSER10.xvg"/> + <has_text text="ramaPhiPsiVAL23.xvg"/> + <has_text text="ramaPhiPsiASP12.xvg"/> + <has_text text="ramaPhiPsiILE39.xvg"/> + <has_text text="ramaPhiPsiSER24.xvg"/> + <has_text text="ramaPhiPsiVAL25.xvg"/> + <has_text text="ramaPhiPsiASP37.xvg"/> + <has_text text="ramaPhiPsiILE46.xvg"/> + <has_text text="ramaPhiPsiSER30.xvg"/> + <has_text text="ramaPhiPsiVAL41.xvg"/> + <has_text text="ramaPhiPsiGLN20.xvg"/> + <has_text text="ramaPhiPsiLEU14.xvg"/> + <has_text text="ramaPhiPsiSER35.xvg"/> + <has_text text="ramaPhiPsiVAL47.xvg"/> + <has_text text="ramaPhiPsiGLN43.xvg"/> + <has_text text="ramaPhiPsiLEU28.xvg"/> + <has_text text="ramaPhiPsiSER51.xvg"/> + <has_text text="ramaPhiPsiVAL8.xvg"/> + <has_text text="ramaPhiPsiGLY1.xvg"/> + <has_text text="ramaPhiPsiLEU38.xvg"/> + <has_text text="ramaPhiPsiTHR13.xvg"/> + <has_text text="--input-format xvg --output output_constava.csv --window 3 --degrees"/> + <not_has_text text="constava fit-model"/> + </assert_command> + <output name="file_contents" file="expected/test_002.csv" ftype="csv"/> + </test> + + <!-- ======================================================================= --> + <!-- Test scenarios: Custom PDF with default training data --> + <!-- ======================================================================= --> + + <!-- test_003: custom probability density functions: kde --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="csv/dihedrals.mini.csv"/> + <param name="input_degrees" value=""/> + <param name="input_precision" value="5"/> + </section> + <section name="conformational_state_model_options"> + <conditional name="custom_model"> + <param name="use_custom_model" value="true"/> + <conditional name="model_type_options"> + <param name="model_type" value="kde"/> + <param name="bandwidth" value="0.15"/> + </conditional> + </conditional> + </section> + <assert_command> + <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/> + <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 --load-model custom_model.kde.pkl"/> + </assert_command> + <output name="file_contents" file="expected/test_003.csv" ftype="csv"/> + </test> + + <!-- test_004: custom probability density functions: grid --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="csv/dihedrals.mini.csv"/> + <param name="input_degrees" value=""/> + <param name="input_precision" value="5"/> + </section> + <section name="conformational_state_model_options"> + <conditional name="custom_model"> + <param name="use_custom_model" value="true"/> + <conditional name="model_type_options"> + <param name="model_type" value="grid"/> + <param name="bandwidth" value="0.15"/> + <param name="custom_model_grid_points" value="1500"/> + </conditional> + </conditional> + </section> + <assert_command> + <has_text text="constava fit-model -vv --model-type grid --grid-points 1500 --kde-bandwidth 0.15 --output custom_model.grid.pkl"/> + <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 --load-model custom_model.grid.pkl"/> + </assert_command> + <output name="file_contents" file="expected/test_004.csv" ftype="csv"/> + </test> + + <!-- ======================================================================= --> + <!-- Test scenarios: Custom PDF with custom training data in JSON --> + <!-- ======================================================================= --> + + <!-- test_005: custom probability density functions with custom json: kde --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="csv/dihedrals.mini.csv"/> + <param name="input_degrees" value=""/> + <param name="input_precision" value="5"/> + </section> + <section name="conformational_state_model_options"> + <conditional name="custom_model"> + <param name="use_custom_model" value="true"/> + <conditional name="model_type_options"> + <param name="model_type" value="kde"/> + <param name="bandwidth" value="0.15"/> + </conditional> + <conditional name="custom_input_json"> + <param name="use_custom_input_file" value="true"/> + <param name="custom_input_file" value="custom-training-data/constava_csdata.mini.json"/> + </conditional> + </conditional> + </section> + <assert_command> + <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl --input constava_csdata.mini.json"/> + <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 --load-model custom_model.kde.pkl"/> + </assert_command> + <output name="file_contents" file="expected/test_005.csv" ftype="csv"/> + </test> + + <!-- test_006: custom probability density functions with custom json: grid --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="csv/dihedrals.mini.csv"/> + <param name="input_degrees" value=""/> + <param name="input_precision" value="5"/> + </section> + <section name="conformational_state_model_options"> + <conditional name="custom_model"> + <param name="use_custom_model" value="true"/> + <conditional name="model_type_options"> + <param name="model_type" value="grid"/> + <param name="bandwidth" value="0.15"/> + <param name="custom_model_grid_points" value="1500"/> + </conditional> + <conditional name="custom_input_json"> + <param name="use_custom_input_file" value="true"/> + <param name="custom_input_file" value="custom-training-data/constava_csdata.mini.json"/> + </conditional> + </conditional> + </section> + <assert_command> + <has_text text="constava fit-model -vv --model-type grid --grid-points 1500 --kde-bandwidth 0.15 --output custom_model.grid.pkl --input constava_csdata.mini.json"/> + <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 --load-model custom_model.grid.pkl"/> + </assert_command> + <output name="file_contents" file="expected/test_006.csv" ftype="csv"/> + </test> + <!-- ======================================================================= --> + <!-- Test scenarios: Default PDF with bootstrap --> + <!-- ======================================================================= --> + + <!-- test_007: CSV Dihedrals in radians with bootstrap with default train data --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="csv/dihedrals.mini.csv"/> + <param name="input_degrees" value=""/> + <param name="input_precision" value="5"/> + </section> + <section name="conformational_state_model_options"> + <conditional name="custom_model"> + <param name="use_custom_model" value="false"/> + </conditional> + </section> + <section name="subsampling_options"> + <conditional name="sampling_options"> + <param name="subsampling_type" value="bootstrap"/> + <param name="bootstrap_size" value="3"/> + <param name="return_bootstrap_series" value="false"/> + <param name="bootstrap_samples" value="10"/> + <param name="bootstrap_seed" value="89"/> + </conditional> + </section> + <assert_command> + <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap 3 --bootstrap-samples 10 --seed 89"/> + <not_has_text text="constava fit-model"/> + </assert_command> + <output name="file_contents" file="expected/test_007.csv" ftype="csv"/> + </test> + + <!-- test_008: CSV Dihedrals in radians with bootstrap series with default train data --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="csv/dihedrals.mini.csv"/> + <param name="input_degrees" value=""/> + <param name="input_precision" value="5"/> + </section> + <section name="conformational_state_model_options"> + <conditional name="custom_model"> + <param name="use_custom_model" value="false"/> + </conditional> + </section> + <section name="subsampling_options"> + <conditional name="sampling_options"> + <param name="subsampling_type" value="bootstrap"/> + <param name="bootstrap_size" value="3"/> + <param name="return_bootstrap_series" value="true"/> + <param name="bootstrap_samples" value="10"/> + <param name="bootstrap_seed" value="89"/> + </conditional> + </section> + <assert_command> + <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap-series 3 --bootstrap-samples 10 --seed 89"/> + <not_has_text text="constava fit-model"/> + </assert_command> + <output name="file_contents" file="expected/test_008.csv" ftype="csv"/> + </test> + + <!-- ======================================================================= --> + <!-- Test scenarios: Custom PDF with bootstrap --> + <!-- ======================================================================= --> + + <!-- test_009: CSV Dihedrals in radians with bootstrap using custom PDF (kde) using default data to train the PDF --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="csv/dihedrals.mini.csv"/> + <param name="input_degrees" value=""/> + <param name="input_precision" value="5"/> + </section> + <section name="conformational_state_model_options"> + <conditional name="custom_model"> + <param name="use_custom_model" value="true"/> + <conditional name="model_type_options"> + <param name="model_type" value="kde"/> + <param name="bandwidth" value="0.15"/> + </conditional> + </conditional> + </section> + <section name="subsampling_options"> + <conditional name="sampling_options"> + <param name="subsampling_type" value="bootstrap"/> + <param name="bootstrap_size" value="3"/> + <param name="return_bootstrap_series" value="false"/> + <param name="bootstrap_samples" value="10"/> + <param name="bootstrap_seed" value="89"/> + </conditional> + </section> + <assert_command> + <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/> + <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap 3 --bootstrap-samples 10 --seed 89 --load-model custom_model.kde.pkl"/> + </assert_command> + <output name="file_contents" file="expected/test_009.csv" ftype="csv"/> + </test> + + <!-- test_010: CSV Dihedrals in radians with bootstrap using custom PDF (kde) using custom training data in json --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="csv/dihedrals.mini.csv"/> + <param name="input_degrees" value=""/> + <param name="input_precision" value="5"/> + </section> + <section name="conformational_state_model_options"> + <conditional name="custom_model"> + <param name="use_custom_model" value="true"/> + <conditional name="model_type_options"> + <param name="model_type" value="kde"/> + <param name="bandwidth" value="0.15"/> + </conditional> + <conditional name="custom_input_json"> + <param name="use_custom_input_file" value="true"/> + <param name="custom_input_file" value="custom-training-data/constava_csdata.mini.json"/> + </conditional> + </conditional> + </section> + <section name="subsampling_options"> + <conditional name="sampling_options"> + <param name="subsampling_type" value="bootstrap"/> + <param name="bootstrap_size" value="3"/> + <param name="return_bootstrap_series" value="false"/> + <param name="bootstrap_samples" value="10"/> + <param name="bootstrap_seed" value="89"/> + </conditional> + </section> + <assert_command> + <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl --input constava_csdata.mini.json"/> + <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap 3 --bootstrap-samples 10 --seed 89 --load-model custom_model.kde.pkl"/> + </assert_command> + <output name="file_contents" file="expected/test_010.csv" ftype="csv"/> + </test> + + <!-- test_011: CSV Dihedrals in radians with bootstrap series using custom PDF (kde) using default data to train the PDF --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="csv/dihedrals.mini.csv"/> + <param name="input_degrees" value=""/> + <param name="input_precision" value="5"/> + </section> + <section name="conformational_state_model_options"> + <conditional name="custom_model"> + <param name="use_custom_model" value="true"/> + <conditional name="model_type_options"> + <param name="model_type" value="kde"/> + <param name="bandwidth" value="0.15"/> + </conditional> + </conditional> + </section> + <section name="subsampling_options"> + <conditional name="sampling_options"> + <param name="subsampling_type" value="bootstrap"/> + <param name="bootstrap_size" value="3"/> + <param name="return_bootstrap_series" value="true"/> + <param name="bootstrap_samples" value="10"/> + <param name="bootstrap_seed" value="89"/> + </conditional> + </section> + <assert_command> + <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/> + <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap-series 3 --bootstrap-samples 10 --seed 89 --load-model custom_model.kde.pkl"/> + </assert_command> + <output name="file_contents" file="expected/test_011.csv" ftype="csv"/> + </test> + + <!-- test_012: CSV Dihedrals in radians with bootstrap series using custom PDF (kde) using custom training data in json --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="csv/dihedrals.mini.csv"/> + <param name="input_degrees" value=""/> + <param name="input_precision" value="5"/> + </section> + <section name="conformational_state_model_options"> + <conditional name="custom_model"> + <param name="use_custom_model" value="true"/> + <conditional name="model_type_options"> + <param name="model_type" value="kde"/> + <param name="bandwidth" value="0.15"/> + </conditional> + <conditional name="custom_input_json"> + <param name="use_custom_input_file" value="true"/> + <param name="custom_input_file" value="custom-training-data/constava_csdata.mini.json"/> + </conditional> + </conditional> + </section> + <section name="subsampling_options"> + <conditional name="sampling_options"> + <param name="subsampling_type" value="bootstrap"/> + <param name="bootstrap_size" value="3"/> + <param name="return_bootstrap_series" value="true"/> + <param name="bootstrap_samples" value="10"/> + <param name="bootstrap_seed" value="89"/> + </conditional> + </section> + <assert_command> + <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl --input constava_csdata.mini.json"/> + <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap-series 3 --bootstrap-samples 10 --seed 89 --load-model custom_model.kde.pkl"/> + </assert_command> + <output name="file_contents" file="expected/test_012.csv" ftype="csv"/> + </test> + + <!-- test_013: Testing the bootstrap validators --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="csv/dihedrals.mini.csv"/> + <param name="input_degrees" value=""/> + <param name="input_precision" value="5"/> + </section> + <section name="conformational_state_model_options"> + <conditional name="custom_model"> + <param name="use_custom_model" value="true"/> + <conditional name="model_type_options"> + <param name="model_type" value="kde"/> + <param name="bandwidth" value="0.15"/> + </conditional> + </conditional> + </section> + <section name="subsampling_options"> + <conditional name="sampling_options"> + <param name="subsampling_type" value="bootstrap"/> + <param name="bootstrap_size" value="10 20"/> + <param name="return_bootstrap_series" value="false"/> + <param name="bootstrap_samples" value="10"/> + <param name="bootstrap_seed" value="18"/> + </conditional> + </section> + <assert_command> + <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/> + <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap 10 20 --bootstrap-samples 10 --seed 18 --load-model custom_model.kde.pkl"/> + </assert_command> + <output name="file_contents" file="expected/test_013.csv" ftype="csv"/> + </test> + <!-- test_014: Testing the window validators --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="csv/dihedrals.mini.csv"/> + <param name="input_degrees" value=""/> + <param name="input_precision" value="5"/> + </section> + <section name="conformational_state_model_options"> + <conditional name="custom_model"> + <param name="use_custom_model" value="true"/> + <conditional name="model_type_options"> + <param name="model_type" value="kde"/> + <param name="bandwidth" value="0.15"/> + </conditional> + </conditional> + </section> + <section name="subsampling_options"> + <conditional name="sampling_options"> + <param name="subsampling_type" value="window"/> + <param name="window_size" value="3 5 7"/> + </conditional> + </section> + <assert_command> + <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/> + <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 5 7 --load-model custom_model.kde.pkl"/> + </assert_command> + <output name="file_contents" file="expected/test_014.csv" ftype="csv"/> + </test> + <!-- test_015: Testing the window series validators --> + <test expect_num_outputs="1"> + <section name="input_options"> + <param name="input_files" value="csv/dihedrals.mini.csv"/> + <param name="input_degrees" value=""/> + <param name="input_precision" value="3"/> + </section> + <section name="conformational_state_model_options"> + <conditional name="custom_model"> + <param name="use_custom_model" value="true"/> + <conditional name="model_type_options"> + <param name="model_type" value="kde"/> + <param name="bandwidth" value="0.15"/> + </conditional> + </conditional> + </section> + <section name="subsampling_options"> + <conditional name="sampling_options"> + <param name="subsampling_type" value="window"/> + <param name="window_size" value="5 7 9 11"/> + <param name="return_window_series" value="true"/> + </conditional> + </section> + <assert_command> + <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/> + <has_text text="constava analyze -vv --precision 3 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window-series 5 7 9 11 --load-model custom_model.kde.pkl"/> + </assert_command> + <output name="file_contents" file="expected/test_015.csv" ftype="csv"/> + </test> + </tests> + <creator> + <person name="José Gavalda-Garcia" honorificPrefix="Dr" identifier="0000-0001-6431-3442" url="https://orcid.org/0000-0001-6431-3442"/> + <person name="David Bickel" honorificPrefix="Dr" identifier="0000-0003-0332-8338" url="https://orcid.org/0000-0003-0332-8338"/> + <person name="Joel Roca-Martinez" honorificPrefix="Dr" identifier="0000-0002-4313-3845" url="https://orcid.org/0000-0002-4313-3845"/> + <person name="Daniele Raimondi" honorificPrefix="Dr" identifier="0000-0003-1157-1899" url="https://orcid.org/0000-0003-1157-1899"/> + <person name="Gabriele Orlando" honorificPrefix="Dr" identifier="0000-0002-5935-5258" url="https://orcid.org/0000-0002-5935-5258"/> + <person name="Wim F. Vranken" honorificPrefix="Dr" email="wim.vranken@vub.be" identifier="0000-0001-7470-4324" url="https://orcid.org/0000-0001-7470-4324"/> + <person name="Iman Jouiad" /> + <person name="Boris Depoortere" email="boris.depoortere@vib.be" identifier="0009-0002-2539-116X" url="https://orcid.org/0009-0002-2539-116X"/> + <person name="Adrián Díaz" email="adrian.diaz@vub.be" identifier="0000-0003-0165-1318" url="https://orcid.org/0000-0003-0165-1318"/> + <organization name="Bio2Byte, Vrije Universiteit Brussel (VUB)" address="Interuniversity Institute Bioinformatics Brussels, Université Libre de Bruxelles, 1050 Ixelles, Brussels, Belgium" url="https://bio2byte.be/rrmscorer" email="bio2byte@vub.be" image="https://0.gravatar.com/avatar/2b51fb7600d876086669bcc85a941b763a81d1c2bb3c667b8c83a1aa892cf740"/> + </creator> + <help><![CDATA[ + `Constava <https://pypi.org/project/constava/>`_ analyzes conformational ensembles to calculate **conformational state propensities** + and **conformational state variability**. + + **Conformational state propensities** describe how likely each residue is to occupy a given conformational state, + whereas **conformational state variability** measures the residue's ability to transition between conformational states. + + Each conformational state is represented by a statistical model derived from the backbone dihedral angles (φ, ψ). + The default models were obtained from an analysis of NMR ensembles and chemical shifts. + To perform an analysis, you must provide φ- and ψ-angles for each conformational state in the ensemble. + + The conformational states were defined according to residue behavior across NMR ensembles: + + - **Core helix** (column ``coreHelix``): Residues that exclusively adopt a helical conformation in all models of their associated ensemble, with shiftCrypt values ≤ 0.2 (N = 93,957 residues). + - **Surrounding helix** (column ``surrHelix``): Residues that adopt a helical conformation in the majority of models, with shiftCrypt values in the range (0.2, 0.4] (N = 8,180 residues). + - **Core sheet** (column ``coreSheet``): Residues that exclusively adopt an extended conformation in all models, with shiftCrypt values ≥ 0.8 (N = 47,280 residues). + - **Surrounding sheet** (column ``surrSheet``): Residues that adopt an extended conformation in most models, with shiftCrypt values in the range [0.6, 0.8) (N = 11,280 residues). + - **Turn** (column ``Turn``): Residues that adopt a turn conformation in most models, with shiftCrypt values in the range (0.4, 0.6) (N = 75,377 residues). + - **Other** (column ``Other``): Residues that adopt a coil conformation in most models, also with shiftCrypt values in the range (0.4, 0.6) (N = 74,542 residues). + + **Input Data and Parameters** + + *Constava* requires backbone dihedral angles extracted from the conformational ensemble as input data. + These angles can be generated with *GROMACS* using the ``gmx chi`` module (set the input format to `'xvg'`), + or they can be obtained using the Python submodule ``constava dihedrals``, which supports a wide range of molecular dynamics and structure formats. + + - **Input files:** Provide files containing the dihedral angles. Supported formats include CSV and XVG. + - **Angle units:** Specify whether the dihedral angles in your files are expressed in radians or degrees. + + .. class:: infomark + + **Example files:** Example datasets in both formats are available in the + `data directory on GitHub <https://github.com/Bio2Byte/constava/tree/main/constava/data>`_. + + The ``constava dihedrals`` submodule extracts backbone dihedral angles from conformational ensembles. + By default, it outputs the results in radians, which is the preferred format for ``constava analyze``. + + **Kernel Options** + + Configure the probability density functions (PDFs) used in the analysis. You can choose predefined PDFs + or fit custom ones from your own data. + + **Subsampling Options** + + You can apply different subsampling strategies, such as window-based analysis or bootstrap sampling, + to assess data variability and statistical robustness. + + - **Window size:** Define the frame size for moving-window analyses (multiple consecutive samples). + - **Bootstrap size:** Specify the number of bootstrap samples to improve statistical confidence. + + **Results and Output Files** + + *Constava* produces an output file containing the calculated variability and propensity measures. + The file format and level of detail depend on your configuration and the selected subsampling options. + + - **Output precision:** Set the decimal places to include in the output file. + + .. class:: warningmark + + **Note:** Accurate results require careful parameter selection. Default settings are provided for convenience, + but they may need adjustment depending on your dataset and analysis goals. + + **Project Links** + + This tool uses the Python package `constava` available via PyPI and BioConda. + + - Source code repository on `GitHub <https://github.com/Bio2Byte/constava>`_ + - Python package on `PyPI <https://pypi.org/project/constava/>`_ + - Conda recipe on `BioConda <https://bioconda.github.io/recipes/constava/README.html>`_ + - Conda package on `Anaconda (BioConda channel) <https://anaconda.org/bioconda/constava>`_ + - Tool profile on `Bio.Tools <https://bio.tools/constava>`_ + ]]></help> + <citations> + <citation type="doi">10.1093/nargab/lqae082</citation> + <citation type="doi">10.1016/j.jmb.2024.168900</citation> + </citations> +</tool>
