Mercurial > repos > iuc > constava

<tool id="constava" name="Constava" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="GPL-3.0-only">
    <description>
        calculates conformational-state probabilities and variability in structural ensembles
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <edam_topics>
        <edam_topic>topic_0130</edam_topic>
    </edam_topics>
    <edam_operations>
        <edam_operation>operation_0249</edam_operation>
    </edam_operations>
    <xrefs>
        <xref type="bio.tools">constava</xref>
        <!-- https://bio.tools/constava -->
    </xrefs>
    <expand macro="requirements"/>
    <version_command>constava --version</version_command>
    <command detect_errors="aggressive" strict="true">
        <![CDATA[
        echo "[Galaxy command block at \$(date +'%Y-%m-%dT%H:%M:%S%z')] Starting Constava Tool execution from Galaxy Platform" &&
        #for $input_file_id, $input_file in enumerate( $input_options.input_files ):
            #if $input_file
                #set ref_name = str($input_file.element_identifier)
                ln -sv '${input_file}' '$ref_name' &&
            #end if
        #end for

        #set angle_units = $input_options.input_degrees
        echo "[Galaxy command block at \$(date +'%Y-%m-%dT%H:%M:%S%z')] Constava will use '$angle_units' as angle units (empty means Radians)" &&

        #if str( $conformational_state_model_options.use_custom_model ) == "true":
            echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Using custom model" &&

            #if str( $conformational_state_model_options.use_custom_input_file ) == "true":
                echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Using custom input file for data training: $conformational_state_model_options.custom_input_file.element_identifier" &&

                #set custom_input_file_angle_units = $conformational_state_model_options.custom_input_file_degrees
                #set ref_name_train_data = str($conformational_state_model_options.custom_input_file.element_identifier)
                #if str( custom_input_file_angle_units ) != "":
                    #set custom_input_parameter="--input " + str( $conformational_state_model_options.custom_input_file.element_identifier ) + " " + $custom_input_file_angle_units
                #else:
                    #set custom_input_parameter="--input " + str( $conformational_state_model_options.custom_input_file.element_identifier )
                #end if

                ln -sv '${conformational_state_model_options.custom_input_file}' '$ref_name_train_data' &&
            #else:
                echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Not using custom input file for data training" &&
                #set custom_input_parameter=""
            #end if

            #if str( $conformational_state_model_options.model_type ) == "kde":
                #set model_file = "custom_model.kde.pkl"
                #set load_model_param="--load-model custom_model.kde.pkl"
                echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Invoking the 'constava fit-model' module command for KDE" &&

                constava fit-model -vv
                    --model-type kde
                    --kde-bandwidth $conformational_state_model_options.bandwidth
                    --output $model_file
                #if str( $custom_input_parameter ) != "":
                $custom_input_parameter
                #end if
                &&
                echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] The 'constava fit-model' module command for KDE has finished" &&
            #else if str( $conformational_state_model_options.model_type ) == "grid":
                #set model_file = "custom_model.grid.pkl"
                #set load_model_param="--load-model custom_model.grid.pkl"
                echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Preparing the 'constava fit-model' module command for GRID" &&

                constava fit-model -vv
                    --model-type grid
                    --grid-points $custom_model_grid_points
                    --kde-bandwidth $conformational_state_model_options.bandwidth
                    --output $model_file
                #if str( $custom_input_parameter ) != "":
                    $custom_input_parameter
                #end if
                &&
                echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] The 'constava fit-model' module command for GRID has finished" &&
            #else:
                #set load_model_param=""
            #end if

        #else:
            echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Not using custom model" &&
            #set load_model_param=""
        #end if

        echo "[Galaxy command block \$(date +"%Y-%m-%dT%H:%M:%S%z")] Invoking the 'constava analyze' module command" &&

        constava analyze -vv --precision $input_precision --input
        #for $input_file_id, $input_file in enumerate( $input_files ):
            #if $input_file:
                #set ref_name = str($input_file.element_identifier)
                #set input_format = str($input_file.ext)
                $ref_name
            #end if
        #end for
            --input-format $input_format
            --output output_constava.csv
        #if str( $subsampling_type ) == "window":
            #if str( $return_window_series ) == "true":
                --window-series $subsampling_options.window_size
            #else:
                --window $subsampling_options.window_size
            #end if
        #else if str( $subsampling_type ) == "bootstrap":
            #if str( $return_bootstrap_series ) == "true":
                --bootstrap-series $bootstrap_size
            #else:
                --bootstrap $bootstrap_size
            #end if
            --bootstrap-samples $bootstrap_samples --seed $bootstrap_seed
        #end if
        #if str ($load_model_param ) != "":
            $load_model_param
        #end if
        #if str( $angle_units ) != "":
            $angle_units
        #end if
        &&
        echo "[Galaxy command block at \$(date +'%Y-%m-%dT%H:%M:%S%z')] Constava execution from Galaxy has finished"
        ]]></command>
    <environment_variables>
        <environment_variable name="MPLBACKEND">Agg</environment_variable>
        <environment_variable name="TQDM_DISABLE">1</environment_variable>
        <environment_variable name="PYTHON_TQDM_DISABLE">1</environment_variable>
    </environment_variables>
    <inputs>
        <section name="input_options" title="Input Options" expanded="true" help="As input data the backbone dihedral angles extracted from the conformational ensemble need to be provided.">
            <param name="input_files" type="data" format="csv,xvg" label="Dihedral angles file" help="Upload the input file(s) that contain the dihedral angles in CSV or XVG format (GROMACS' `gmx chi` module). Important: Given Constava extracts RESNAME and RESINDEX from filenames when using XVG format, your files must follow this regex 'ramaPhiPsi([A-Z][A-Z0-9][A-Z0-9])([0-9]+).xvg'" multiple="true" argument="input"/>
            <param name="input_degrees" type="select" label="Are the dihedral angles in that file in radians or degrees?" help="Indicate if the dihedral angles are in radians or degrees." argument="degrees">
                <option value="--degrees">Degrees</option>
                <option value="" selected="true">Radians</option>
            </param>
            <param name="input_precision" type="integer" label="Decimal precision" help="Sets the number of decimals in the output files." value="3" default_value="3" min="1" max="16" argument="precision"></param>
        </section>
        <section name="conformational_state_model_options" title="Kernel Options" help="By default, the conformational state models are generated on-the-fly when running Constava. In selected cases generating a model beforehand and loading it can be useful, though.">
            <conditional name="custom_model">
                <param name="use_custom_model" type="select" label="Do you want to train a custom probabilistic model of conformational states?">
                    <option value="false" selected="true">No</option>
                    <option value="true">Yes</option>
                </param>
                <when value="true"> <!-- Yes, train a custom probabilistic model of conformational states-->
                    <conditional name="model_type_options">
                        <param name="model_type" type="select" label="Select a model type" help="We provide two model types. KDE models are the default. They are fast to fit but may be slow in the inference in large conformational ensembles (e.g. long-timescale MD simulations). The idea of Grid models is, to replace the continuous probability density function of the kde-Model by a fixed set of grid-points. The PDF for any sample is then estimated by linear interpolation between the nearest grid points. This is slightly less accurate than the kde-Model but speeds up inference significantly.">
                            <option value="kde">KDE model</option>
                            <option value="grid">Grid model</option>
                        </param>
                        <when value="kde">
                            <param name="bandwidth" type="float" label="Which bandwidth do you want to use?" value="0.13" help="This flag controls the bandwidth of the Gaussian kernel density estimator. (default: 0.13)" argument="kde-bandwidth"/>
                        </when>
                        <when value="grid">
                            <param name="bandwidth" type="float" label="Which bandwidth do you want to use?" value="0.13" help="This flag controls the bandwidth of the Gaussian kernel density estimator. (default: 0.13)" argument="kde-bandwidth"/>
                            <param name="custom_model_grid_points" type="integer" label="Grid points" value="10000" help="This flag controls how many grid points are used to describe the probability density function. (default: 10000)" argument="grid-points"/>
                        </when>
                    </conditional>
                    <conditional name="custom_input_json">
                        <param name="use_custom_input_file" type="select" label="Do you want to train probability density functions with custom data?" help="If not, the default data from the publication will be used.">
                            <option value="false" selected="true">No</option>
                            <option value="true">Yes</option>
                        </param>
                        <when value="true">
                            <param name="custom_input_file" type="data" format="json" label="File for pdf fitting in JSON format" help="The data to which the new conformational state models will be fitted. It should be provided as a JSON file. The top-most key should indicate the names of the conformational states. On the level below, lists of phi-psi pairs for each stat should be provided." argument="input"/>
                            <param name="custom_input_file_degrees" type="select" label="Are the dihedral angles in that file in radians or degrees?" help="Indicate if the dihedral angles of the training data file are in radians or degrees." argument="degrees">
                                <option value="--degrees">Degrees</option>
                                <option value="" selected="true">Radians</option>
                            </param>
                        </when>
                        <when value="false"/>
                    </conditional>
                </when>
                <when value="false"/>
            </conditional>
        </section>
        <section name="subsampling_options" title="Subsampling Options" help="Do inference using either a moving reading-frame of consecutive samples (sliding window) or using a moving reading-frame of consecutive samples (bootstrap).">
            <conditional name="sampling_options">
                <param name="subsampling_type" type="select" label="Select a subsampling method to configure" help="You must select and configure at least one subsampling option.">
                    <option value="window" selected="true">Sliding window</option>
                    <option value="bootstrap">Bootstrap sampling</option>
                </param>
                <when value="window">
                    <param name="window_size" type="text" label="Window size (space-separated integers)" value="3" help="Specify window sizes for moving frame analysis, e.g., '3 5 7'. Each reading frame consists of consecutive samples. Multiple values can be provided." argument="window">
                        <validator type="regex" message="Use one or more integers separated by single spaces.">^(\d+\s?)+$</validator>
                    </param>
                    <param name="return_window_series" type="boolean" label="Return the results for every window rather than the average." help="Return the results for every window rather than the average. This can result in very large output files." value="false" argument="window-series"/>
                </when>
                <when value="bootstrap">
                    <param name="bootstrap_size" type="text" label="Bootstrap size (space-separated integers)" value="3" help="Do inference using N samples obtained through bootstrapping. Specify bootstrap sizes, e.g., '10 20 30'. Samples obtained through bootstrapping. Multiple values can be provided." argument="bootstrap">
                        <validator type="regex" message="Use one or more integers separated by single spaces.">^(\d+\s?)+$</validator>
                    </param>
                    <param name="return_bootstrap_series" type="boolean" label="Return bootstrap series calculation" help="Return the results for every subsample rather than the average. This can result in very large output files." value="false" argument="bootstrap-series"/>
                    <param name="bootstrap_samples" type="integer" label="Bootstrap samples" value="10000" min="1" help="When bootstrapping, sample times from the input data."/>
                    <param name="bootstrap_seed" type="integer" label="Bootstrap seed" value="42" min="1" help="Set random seed for bootstrap sampling." argument="seed"/>
                </when>
            </conditional>
        </section>
    </inputs>
    <outputs>
        <data name="file_contents" format="csv" label="${tool.name} on ${on_string}: File Contents" from_work_dir="output_constava.csv"/>
    </outputs>
    <tests>
        <!-- ======================================================================= -->
        <!-- Test scenarios: Default PDF -->
        <!-- ======================================================================= -->

        <!-- test_001: CSV Dihedrals in radians with window_size 3 using default PDF -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="csv/dihedrals.mini.csv"/>
                <param name="input_degrees" value=""/>
                <param name="input_precision" value="10"/>
            </section>
            <assert_command>
                <has_text text="constava analyze -vv --precision 10 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3"/>
                <not_has_text text="constava fit-model"/>
            </assert_command>
            <output name="file_contents" file="expected/test_001.csv" ftype="csv"/>
        </test>

        <!-- test_002: XVG Dihedrals in degrees with window_size 3 using default PDF -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="xvg/ramaPhiPsiALA18.xvg,xvg/ramaPhiPsiGLY11.xvg,xvg/ramaPhiPsiLEU45.xvg,xvg/ramaPhiPsiTHR2.xvg,xvg/ramaPhiPsiALA19.xvg,xvg/ramaPhiPsiGLY22.xvg,xvg/ramaPhiPsiLYS16.xvg,xvg/ramaPhiPsiTHR4.xvg,xvg/ramaPhiPsiALA26.xvg,xvg/ramaPhiPsiGLY33.xvg,xvg/ramaPhiPsiLYS44.xvg,xvg/ramaPhiPsiTHR7.xvg,xvg/ramaPhiPsiARG29.xvg,xvg/ramaPhiPsiGLY36.xvg,xvg/ramaPhiPsiLYS48.xvg,xvg/ramaPhiPsiTRP31.xvg,xvg/ramaPhiPsiASN15.xvg,xvg/ramaPhiPsiGLY42.xvg,xvg/ramaPhiPsiLYS49.xvg,xvg/ramaPhiPsiTYR21.xvg,xvg/ramaPhiPsiASN27.xvg,xvg/ramaPhiPsiGLY50.xvg,xvg/ramaPhiPsiLYS9.xvg,xvg/ramaPhiPsiTYR5.xvg,xvg/ramaPhiPsiASN3.xvg,xvg/ramaPhiPsiILE17.xvg,xvg/ramaPhiPsiPHE40.xvg,xvg/ramaPhiPsiTYR6.xvg,xvg/ramaPhiPsiASN32.xvg,xvg/ramaPhiPsiILE34.xvg,xvg/ramaPhiPsiSER10.xvg,xvg/ramaPhiPsiVAL23.xvg,xvg/ramaPhiPsiASP12.xvg,xvg/ramaPhiPsiILE39.xvg,xvg/ramaPhiPsiSER24.xvg,xvg/ramaPhiPsiVAL25.xvg,xvg/ramaPhiPsiASP37.xvg,xvg/ramaPhiPsiILE46.xvg,xvg/ramaPhiPsiSER30.xvg,xvg/ramaPhiPsiVAL41.xvg,xvg/ramaPhiPsiGLN20.xvg,xvg/ramaPhiPsiLEU14.xvg,xvg/ramaPhiPsiSER35.xvg,xvg/ramaPhiPsiVAL47.xvg,xvg/ramaPhiPsiGLN43.xvg,xvg/ramaPhiPsiLEU28.xvg,xvg/ramaPhiPsiSER51.xvg,xvg/ramaPhiPsiVAL8.xvg,xvg/ramaPhiPsiGLY1.xvg,xvg/ramaPhiPsiLEU38.xvg,xvg/ramaPhiPsiTHR13.xvg" ftype="xvg"/>
                <param name="input_degrees" value="--degrees"/>
                <param name="input_precision" value="5"/>
            </section>
            <assert_command>
                <has_text text="constava analyze -vv --precision 5 --input"/>
                <has_text text="ramaPhiPsiALA18.xvg"/>
                <has_text text="ramaPhiPsiGLY11.xvg"/>
                <has_text text="ramaPhiPsiLEU45.xvg"/>
                <has_text text="ramaPhiPsiTHR2.xvg"/>
                <has_text text="ramaPhiPsiALA19.xvg"/>
                <has_text text="ramaPhiPsiGLY22.xvg"/>
                <has_text text="ramaPhiPsiLYS16.xvg"/>
                <has_text text="ramaPhiPsiTHR4.xvg"/>
                <has_text text="ramaPhiPsiALA26.xvg"/>
                <has_text text="ramaPhiPsiGLY33.xvg"/>
                <has_text text="ramaPhiPsiLYS44.xvg"/>
                <has_text text="ramaPhiPsiTHR7.xvg"/>
                <has_text text="ramaPhiPsiARG29.xvg"/>
                <has_text text="ramaPhiPsiGLY36.xvg"/>
                <has_text text="ramaPhiPsiLYS48.xvg"/>
                <has_text text="ramaPhiPsiTRP31.xvg"/>
                <has_text text="ramaPhiPsiASN15.xvg"/>
                <has_text text="ramaPhiPsiGLY42.xvg"/>
                <has_text text="ramaPhiPsiLYS49.xvg"/>
                <has_text text="ramaPhiPsiTYR21.xvg"/>
                <has_text text="ramaPhiPsiASN27.xvg"/>
                <has_text text="ramaPhiPsiGLY50.xvg"/>
                <has_text text="ramaPhiPsiLYS9.xvg"/>
                <has_text text="ramaPhiPsiTYR5.xvg"/>
                <has_text text="ramaPhiPsiASN3.xvg"/>
                <has_text text="ramaPhiPsiILE17.xvg"/>
                <has_text text="ramaPhiPsiPHE40.xvg"/>
                <has_text text="ramaPhiPsiTYR6.xvg"/>
                <has_text text="ramaPhiPsiASN32.xvg"/>
                <has_text text="ramaPhiPsiILE34.xvg"/>
                <has_text text="ramaPhiPsiSER10.xvg"/>
                <has_text text="ramaPhiPsiVAL23.xvg"/>
                <has_text text="ramaPhiPsiASP12.xvg"/>
                <has_text text="ramaPhiPsiILE39.xvg"/>
                <has_text text="ramaPhiPsiSER24.xvg"/>
                <has_text text="ramaPhiPsiVAL25.xvg"/>
                <has_text text="ramaPhiPsiASP37.xvg"/>
                <has_text text="ramaPhiPsiILE46.xvg"/>
                <has_text text="ramaPhiPsiSER30.xvg"/>
                <has_text text="ramaPhiPsiVAL41.xvg"/>
                <has_text text="ramaPhiPsiGLN20.xvg"/>
                <has_text text="ramaPhiPsiLEU14.xvg"/>
                <has_text text="ramaPhiPsiSER35.xvg"/>
                <has_text text="ramaPhiPsiVAL47.xvg"/>
                <has_text text="ramaPhiPsiGLN43.xvg"/>
                <has_text text="ramaPhiPsiLEU28.xvg"/>
                <has_text text="ramaPhiPsiSER51.xvg"/>
                <has_text text="ramaPhiPsiVAL8.xvg"/>
                <has_text text="ramaPhiPsiGLY1.xvg"/>
                <has_text text="ramaPhiPsiLEU38.xvg"/>
                <has_text text="ramaPhiPsiTHR13.xvg"/>
                <has_text text="--input-format xvg --output output_constava.csv --window 3 --degrees"/>
                <not_has_text text="constava fit-model"/>
            </assert_command>
            <output name="file_contents" file="expected/test_002.csv" ftype="csv"/>
        </test>

        <!-- ======================================================================= -->
        <!-- Test scenarios: Custom PDF with default training data -->
        <!-- ======================================================================= -->

        <!-- test_003: custom probability density functions: kde -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="csv/dihedrals.mini.csv"/>
                <param name="input_degrees" value=""/>
                <param name="input_precision" value="5"/>
            </section>
            <section name="conformational_state_model_options">
                <conditional name="custom_model">
                    <param name="use_custom_model" value="true"/>
                    <conditional name="model_type_options">
                        <param name="model_type" value="kde"/>
                        <param name="bandwidth" value="0.15"/>
                    </conditional>
                </conditional>
            </section>
            <assert_command>
                <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/>
                <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 --load-model custom_model.kde.pkl"/>
            </assert_command>
            <output name="file_contents" file="expected/test_003.csv" ftype="csv"/>
        </test>

        <!-- test_004: custom probability density functions: grid -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="csv/dihedrals.mini.csv"/>
                <param name="input_degrees" value=""/>
                <param name="input_precision" value="5"/>
            </section>
            <section name="conformational_state_model_options">
                <conditional name="custom_model">
                    <param name="use_custom_model" value="true"/>
                    <conditional name="model_type_options">
                        <param name="model_type" value="grid"/>
                        <param name="bandwidth" value="0.15"/>
                        <param name="custom_model_grid_points" value="1500"/>
                    </conditional>
                </conditional>
            </section>
            <assert_command>
                <has_text text="constava fit-model -vv --model-type grid --grid-points 1500 --kde-bandwidth 0.15 --output custom_model.grid.pkl"/>
                <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 --load-model custom_model.grid.pkl"/>
            </assert_command>
            <output name="file_contents" file="expected/test_004.csv" ftype="csv"/>
        </test>

        <!-- ======================================================================= -->
        <!-- Test scenarios: Custom PDF with custom training data in JSON -->
        <!-- ======================================================================= -->

        <!-- test_005: custom probability density functions with custom json: kde -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="csv/dihedrals.mini.csv"/>
                <param name="input_degrees" value=""/>
                <param name="input_precision" value="5"/>
            </section>
            <section name="conformational_state_model_options">
                <conditional name="custom_model">
                    <param name="use_custom_model" value="true"/>
                    <conditional name="model_type_options">
                        <param name="model_type" value="kde"/>
                        <param name="bandwidth" value="0.15"/>
                    </conditional>
                    <conditional name="custom_input_json">
                        <param name="use_custom_input_file" value="true"/>
                        <param name="custom_input_file" value="custom-training-data/constava_csdata.mini.json"/>
                    </conditional>
                </conditional>
            </section>
            <assert_command>
                <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl --input constava_csdata.mini.json"/>
                <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 --load-model custom_model.kde.pkl"/>
            </assert_command>
            <output name="file_contents" file="expected/test_005.csv" ftype="csv"/>
        </test>

        <!-- test_006: custom probability density functions with custom json: grid -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="csv/dihedrals.mini.csv"/>
                <param name="input_degrees" value=""/>
                <param name="input_precision" value="5"/>
            </section>
            <section name="conformational_state_model_options">
                <conditional name="custom_model">
                    <param name="use_custom_model" value="true"/>
                    <conditional name="model_type_options">
                        <param name="model_type" value="grid"/>
                        <param name="bandwidth" value="0.15"/>
                        <param name="custom_model_grid_points" value="1500"/>
                    </conditional>
                    <conditional name="custom_input_json">
                        <param name="use_custom_input_file" value="true"/>
                        <param name="custom_input_file" value="custom-training-data/constava_csdata.mini.json"/>
                    </conditional>
                </conditional>
            </section>
            <assert_command>
                <has_text text="constava fit-model -vv --model-type grid --grid-points 1500 --kde-bandwidth 0.15 --output custom_model.grid.pkl --input constava_csdata.mini.json"/>
                <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 --load-model custom_model.grid.pkl"/>
            </assert_command>
            <output name="file_contents" file="expected/test_006.csv" ftype="csv"/>
        </test>
        <!-- ======================================================================= -->
        <!-- Test scenarios: Default PDF with bootstrap -->
        <!-- ======================================================================= -->

        <!-- test_007: CSV Dihedrals in radians with bootstrap with default train data -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="csv/dihedrals.mini.csv"/>
                <param name="input_degrees" value=""/>
                <param name="input_precision" value="5"/>
            </section>
            <section name="conformational_state_model_options">
                <conditional name="custom_model">
                    <param name="use_custom_model" value="false"/>
                </conditional>
            </section>
            <section name="subsampling_options">
                <conditional name="sampling_options">
                    <param name="subsampling_type" value="bootstrap"/>
                    <param name="bootstrap_size" value="3"/>
                    <param name="return_bootstrap_series" value="false"/>
                    <param name="bootstrap_samples" value="10"/>
                    <param name="bootstrap_seed" value="89"/>
                </conditional>
            </section>
            <assert_command>
                <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap 3 --bootstrap-samples 10 --seed 89"/>
                <not_has_text text="constava fit-model"/>
            </assert_command>
            <output name="file_contents" file="expected/test_007.csv" ftype="csv"/>
        </test>

        <!-- test_008: CSV Dihedrals in radians with bootstrap series with default train data -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="csv/dihedrals.mini.csv"/>
                <param name="input_degrees" value=""/>
                <param name="input_precision" value="5"/>
            </section>
            <section name="conformational_state_model_options">
                <conditional name="custom_model">
                    <param name="use_custom_model" value="false"/>
                </conditional>
            </section>
            <section name="subsampling_options">
                <conditional name="sampling_options">
                    <param name="subsampling_type" value="bootstrap"/>
                    <param name="bootstrap_size" value="3"/>
                    <param name="return_bootstrap_series" value="true"/>
                    <param name="bootstrap_samples" value="10"/>
                    <param name="bootstrap_seed" value="89"/>
                </conditional>
            </section>
            <assert_command>
                <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap-series 3 --bootstrap-samples 10 --seed 89"/>
                <not_has_text text="constava fit-model"/>
            </assert_command>
            <output name="file_contents" file="expected/test_008.csv" ftype="csv"/>
        </test>

        <!-- ======================================================================= -->
        <!-- Test scenarios: Custom PDF with bootstrap -->
        <!-- ======================================================================= -->

        <!-- test_009: CSV Dihedrals in radians with bootstrap using custom PDF (kde) using default data to train the PDF -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="csv/dihedrals.mini.csv"/>
                <param name="input_degrees" value=""/>
                <param name="input_precision" value="5"/>
            </section>
            <section name="conformational_state_model_options">
                <conditional name="custom_model">
                    <param name="use_custom_model" value="true"/>
                    <conditional name="model_type_options">
                        <param name="model_type" value="kde"/>
                        <param name="bandwidth" value="0.15"/>
                    </conditional>
                </conditional>
            </section>
            <section name="subsampling_options">
                <conditional name="sampling_options">
                    <param name="subsampling_type" value="bootstrap"/>
                    <param name="bootstrap_size" value="3"/>
                    <param name="return_bootstrap_series" value="false"/>
                    <param name="bootstrap_samples" value="10"/>
                    <param name="bootstrap_seed" value="89"/>
                </conditional>
            </section>
            <assert_command>
                <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/>
                <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap 3 --bootstrap-samples 10 --seed 89 --load-model custom_model.kde.pkl"/>
            </assert_command>
            <output name="file_contents" file="expected/test_009.csv" ftype="csv"/>
        </test>

        <!-- test_010: CSV Dihedrals in radians with bootstrap using custom PDF (kde) using custom training data in json -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="csv/dihedrals.mini.csv"/>
                <param name="input_degrees" value=""/>
                <param name="input_precision" value="5"/>
            </section>
            <section name="conformational_state_model_options">
                <conditional name="custom_model">
                    <param name="use_custom_model" value="true"/>
                    <conditional name="model_type_options">
                        <param name="model_type" value="kde"/>
                        <param name="bandwidth" value="0.15"/>
                    </conditional>
                    <conditional name="custom_input_json">
                        <param name="use_custom_input_file" value="true"/>
                        <param name="custom_input_file" value="custom-training-data/constava_csdata.mini.json"/>
                    </conditional>
                </conditional>
            </section>
            <section name="subsampling_options">
                <conditional name="sampling_options">
                    <param name="subsampling_type" value="bootstrap"/>
                    <param name="bootstrap_size" value="3"/>
                    <param name="return_bootstrap_series" value="false"/>
                    <param name="bootstrap_samples" value="10"/>
                    <param name="bootstrap_seed" value="89"/>
                </conditional>
            </section>
            <assert_command>
                <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl --input constava_csdata.mini.json"/>
                <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap 3 --bootstrap-samples 10 --seed 89 --load-model custom_model.kde.pkl"/>
            </assert_command>
            <output name="file_contents" file="expected/test_010.csv" ftype="csv"/>
        </test>

        <!-- test_011: CSV Dihedrals in radians with bootstrap series using custom PDF (kde) using default data to train the PDF -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="csv/dihedrals.mini.csv"/>
                <param name="input_degrees" value=""/>
                <param name="input_precision" value="5"/>
            </section>
            <section name="conformational_state_model_options">
                <conditional name="custom_model">
                    <param name="use_custom_model" value="true"/>
                    <conditional name="model_type_options">
                        <param name="model_type" value="kde"/>
                        <param name="bandwidth" value="0.15"/>
                    </conditional>
                </conditional>
            </section>
            <section name="subsampling_options">
                <conditional name="sampling_options">
                    <param name="subsampling_type" value="bootstrap"/>
                    <param name="bootstrap_size" value="3"/>
                    <param name="return_bootstrap_series" value="true"/>
                    <param name="bootstrap_samples" value="10"/>
                    <param name="bootstrap_seed" value="89"/>
                </conditional>
            </section>
            <assert_command>
                <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/>
                <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap-series 3 --bootstrap-samples 10 --seed 89 --load-model custom_model.kde.pkl"/>
            </assert_command>
            <output name="file_contents" file="expected/test_011.csv" ftype="csv"/>
        </test>

        <!-- test_012: CSV Dihedrals in radians with bootstrap series using custom PDF (kde) using custom training data in json -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="csv/dihedrals.mini.csv"/>
                <param name="input_degrees" value=""/>
                <param name="input_precision" value="5"/>
            </section>
            <section name="conformational_state_model_options">
                <conditional name="custom_model">
                    <param name="use_custom_model" value="true"/>
                    <conditional name="model_type_options">
                        <param name="model_type" value="kde"/>
                        <param name="bandwidth" value="0.15"/>
                    </conditional>
                    <conditional name="custom_input_json">
                        <param name="use_custom_input_file" value="true"/>
                        <param name="custom_input_file" value="custom-training-data/constava_csdata.mini.json"/>
                    </conditional>
                </conditional>
            </section>
            <section name="subsampling_options">
                <conditional name="sampling_options">
                    <param name="subsampling_type" value="bootstrap"/>
                    <param name="bootstrap_size" value="3"/>
                    <param name="return_bootstrap_series" value="true"/>
                    <param name="bootstrap_samples" value="10"/>
                    <param name="bootstrap_seed" value="89"/>
                </conditional>
            </section>
            <assert_command>
                <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl --input constava_csdata.mini.json"/>
                <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap-series 3 --bootstrap-samples 10 --seed 89 --load-model custom_model.kde.pkl"/>
            </assert_command>
            <output name="file_contents" file="expected/test_012.csv" ftype="csv"/>
        </test>

        <!-- test_013: Testing the bootstrap validators -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="csv/dihedrals.mini.csv"/>
                <param name="input_degrees" value=""/>
                <param name="input_precision" value="5"/>
            </section>
            <section name="conformational_state_model_options">
                <conditional name="custom_model">
                    <param name="use_custom_model" value="true"/>
                    <conditional name="model_type_options">
                        <param name="model_type" value="kde"/>
                        <param name="bandwidth" value="0.15"/>
                    </conditional>
                </conditional>
            </section>
            <section name="subsampling_options">
                <conditional name="sampling_options">
                    <param name="subsampling_type" value="bootstrap"/>
                    <param name="bootstrap_size" value="10 20"/>
                    <param name="return_bootstrap_series" value="false"/>
                    <param name="bootstrap_samples" value="10"/>
                    <param name="bootstrap_seed" value="18"/>
                </conditional>
            </section>
            <assert_command>
                <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/>
                <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --bootstrap 10 20 --bootstrap-samples 10 --seed 18 --load-model custom_model.kde.pkl"/>
            </assert_command>
            <output name="file_contents" file="expected/test_013.csv" ftype="csv"/>
        </test>
        <!-- test_014: Testing the window validators -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="csv/dihedrals.mini.csv"/>
                <param name="input_degrees" value=""/>
                <param name="input_precision" value="5"/>
            </section>
            <section name="conformational_state_model_options">
                <conditional name="custom_model">
                    <param name="use_custom_model" value="true"/>
                    <conditional name="model_type_options">
                        <param name="model_type" value="kde"/>
                        <param name="bandwidth" value="0.15"/>
                    </conditional>
                </conditional>
            </section>
            <section name="subsampling_options">
                <conditional name="sampling_options">
                    <param name="subsampling_type" value="window"/>
                    <param name="window_size" value="3 5 7"/>
                </conditional>
            </section>
            <assert_command>
                <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/>
                <has_text text="constava analyze -vv --precision 5 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window 3 5 7 --load-model custom_model.kde.pkl"/>
            </assert_command>
            <output name="file_contents" file="expected/test_014.csv" ftype="csv"/>
        </test>
        <!-- test_015: Testing the window series validators -->
        <test expect_num_outputs="1">
            <section name="input_options">
                <param name="input_files" value="csv/dihedrals.mini.csv"/>
                <param name="input_degrees" value=""/>
                <param name="input_precision" value="3"/>
            </section>
            <section name="conformational_state_model_options">
                <conditional name="custom_model">
                    <param name="use_custom_model" value="true"/>
                    <conditional name="model_type_options">
                        <param name="model_type" value="kde"/>
                        <param name="bandwidth" value="0.15"/>
                    </conditional>
                </conditional>
            </section>
            <section name="subsampling_options">
                <conditional name="sampling_options">
                    <param name="subsampling_type" value="window"/>
                    <param name="window_size" value="5 7 9 11"/>
                    <param name="return_window_series" value="true"/>
                </conditional>
            </section>
            <assert_command>
                <has_text text="constava fit-model -vv --model-type kde --kde-bandwidth 0.15 --output custom_model.kde.pkl"/>
                <has_text text="constava analyze -vv --precision 3 --input dihedrals.mini.csv --input-format csv --output output_constava.csv --window-series 5 7 9 11 --load-model custom_model.kde.pkl"/>
            </assert_command>
            <output name="file_contents" file="expected/test_015.csv" ftype="csv"/>
        </test>
    </tests>
    <creator>
        <person name="José Gavalda-Garcia" honorificPrefix="Dr" identifier="0000-0001-6431-3442" url="https://orcid.org/0000-0001-6431-3442"/>
        <person name="David Bickel" honorificPrefix="Dr" identifier="0000-0003-0332-8338" url="https://orcid.org/0000-0003-0332-8338"/>
        <person name="Joel Roca-Martinez" honorificPrefix="Dr" identifier="0000-0002-4313-3845" url="https://orcid.org/0000-0002-4313-3845"/>
        <person name="Daniele Raimondi" honorificPrefix="Dr" identifier="0000-0003-1157-1899" url="https://orcid.org/0000-0003-1157-1899"/>
        <person name="Gabriele Orlando" honorificPrefix="Dr" identifier="0000-0002-5935-5258" url="https://orcid.org/0000-0002-5935-5258"/>
        <person name="Wim F. Vranken" honorificPrefix="Dr" email="wim.vranken@vub.be" identifier="0000-0001-7470-4324" url="https://orcid.org/0000-0001-7470-4324"/>
        <person name="Iman Jouiad" />
        <person name="Boris Depoortere" email="boris.depoortere@vib.be" identifier="0009-0002-2539-116X" url="https://orcid.org/0009-0002-2539-116X"/>
        <person name="Adrián Díaz" email="adrian.diaz@vub.be" identifier="0000-0003-0165-1318" url="https://orcid.org/0000-0003-0165-1318"/>
        <organization name="Bio2Byte, Vrije Universiteit Brussel (VUB)" address="Interuniversity Institute Bioinformatics Brussels, Université Libre de Bruxelles, 1050 Ixelles, Brussels, Belgium" url="https://bio2byte.be/rrmscorer" email="bio2byte@vub.be" image="https://0.gravatar.com/avatar/2b51fb7600d876086669bcc85a941b763a81d1c2bb3c667b8c83a1aa892cf740"/>
    </creator>
    <help><![CDATA[
    `Constava <https://pypi.org/project/constava/>`_ analyzes conformational ensembles to calculate **conformational state propensities**
    and **conformational state variability**.

    **Conformational state propensities** describe how likely each residue is to occupy a given conformational state,
    whereas **conformational state variability** measures the residue's ability to transition between conformational states.

    Each conformational state is represented by a statistical model derived from the backbone dihedral angles (φ, ψ).
    The default models were obtained from an analysis of NMR ensembles and chemical shifts.
    To perform an analysis, you must provide φ- and ψ-angles for each conformational state in the ensemble.

    The conformational states were defined according to residue behavior across NMR ensembles:

    - **Core helix** (column ``coreHelix``): Residues that exclusively adopt a helical conformation in all models of their associated ensemble, with shiftCrypt values ≤ 0.2 (N = 93,957 residues).
    - **Surrounding helix** (column ``surrHelix``): Residues that adopt a helical conformation in the majority of models, with shiftCrypt values in the range (0.2, 0.4] (N = 8,180 residues).
    - **Core sheet** (column ``coreSheet``): Residues that exclusively adopt an extended conformation in all models, with shiftCrypt values ≥ 0.8 (N = 47,280 residues).
    - **Surrounding sheet** (column ``surrSheet``): Residues that adopt an extended conformation in most models, with shiftCrypt values in the range [0.6, 0.8) (N = 11,280 residues).
    - **Turn** (column ``Turn``): Residues that adopt a turn conformation in most models, with shiftCrypt values in the range (0.4, 0.6) (N = 75,377 residues).
    - **Other** (column ``Other``): Residues that adopt a coil conformation in most models, also with shiftCrypt values in the range (0.4, 0.6) (N = 74,542 residues).

    **Input Data and Parameters**

    *Constava* requires backbone dihedral angles extracted from the conformational ensemble as input data.
    These angles can be generated with *GROMACS* using the ``gmx chi`` module (set the input format to `'xvg'`),
    or they can be obtained using the Python submodule ``constava dihedrals``, which supports a wide range of molecular dynamics and structure formats.

    - **Input files:** Provide files containing the dihedral angles. Supported formats include CSV and XVG.
    - **Angle units:** Specify whether the dihedral angles in your files are expressed in radians or degrees.

    .. class:: infomark

    **Example files:** Example datasets in both formats are available in the
    `data directory on GitHub <https://github.com/Bio2Byte/constava/tree/main/constava/data>`_.

    The ``constava dihedrals`` submodule extracts backbone dihedral angles from conformational ensembles.
    By default, it outputs the results in radians, which is the preferred format for ``constava analyze``.

    **Kernel Options**

    Configure the probability density functions (PDFs) used in the analysis. You can choose predefined PDFs
    or fit custom ones from your own data.

    **Subsampling Options**

    You can apply different subsampling strategies, such as window-based analysis or bootstrap sampling,
    to assess data variability and statistical robustness.

    - **Window size:** Define the frame size for moving-window analyses (multiple consecutive samples).
    - **Bootstrap size:** Specify the number of bootstrap samples to improve statistical confidence.

    **Results and Output Files**

    *Constava* produces an output file containing the calculated variability and propensity measures.
    The file format and level of detail depend on your configuration and the selected subsampling options.

    - **Output precision:** Set the decimal places to include in the output file.

    .. class:: warningmark

    **Note:** Accurate results require careful parameter selection. Default settings are provided for convenience,
    but they may need adjustment depending on your dataset and analysis goals.

    **Project Links**

    This tool uses the Python package `constava` available via PyPI and BioConda.

    - Source code repository on `GitHub <https://github.com/Bio2Byte/constava>`_
    - Python package on `PyPI <https://pypi.org/project/constava/>`_
    - Conda recipe on `BioConda <https://bioconda.github.io/recipes/constava/README.html>`_
    - Conda package on `Anaconda (BioConda channel) <https://anaconda.org/bioconda/constava>`_
    - Tool profile on `Bio.Tools <https://bio.tools/constava>`_
    ]]></help>
    <citations>
        <citation type="doi">10.1093/nargab/lqae082</citation>
        <citation type="doi">10.1016/j.jmb.2024.168900</citation>
    </citations>
</tool>