Mercurial > repos > iuc > rrmscorer

<tool id="rrmscorer" name="RRM-Scorer" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Predicts RNA Recognition Motif (RRM) scores</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <xrefs>
        <xref type="bio.tools">RRMScorer</xref>
    </xrefs>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        mkdir -p json tabular aligned plots &&
        rrmscorer
            --json 'json'
            --csv 'tabular'
            --window_size $tool_parameters.window_size
        #if str($input_sequence.input_type_cond.input_type) == 'fasta'
            --fasta '$input_sequence.input_type_cond.fasta_input'
        #elif str($input_sequence.input_type_cond.input_type) == 'uniprot'
            --uniprot '$input_sequence.input_type_cond.uniprot_id'
        #end if

        #if $output_options.generate_plots
            --plot 'plots'
        #end if

        #if $output_options.generate_fasta
            --aligned 'aligned'
        #end if

        #if $tool_parameters.target and not $output_options.top_scoring_rna
            --rna '$tool_parameters.target'
        #else
            --top
        #end if
        ]]></command>
    <inputs>
        <section name="input_sequence" title="Input sequence" expanded="true">
            <conditional name="input_type_cond">
                <param name="input_type" type="select" label="Select the type of sequence input" optional="false">
                    <option value="fasta" selected="true">FASTA file containing the protein sequence(s)</option>
                    <option value="uniprot">Protein identifier from UniProt</option>
                </param>
                <when value="fasta">
                    <param name="fasta_input" type="data" format="fasta" optional="false" multiple="false" label="Protein sequence(s) in FASTA format" help="Provide a FASTA file containing the protein sequences.">
                        <validator type="dataset_ok_validator"/>
                    </param>
                </when>
                <when value="uniprot">
                    <param name="uniprot_id" type="text" value="" optional="false" label="Protein identifier from UniProt" help="Provide a UniProt ID (e.g. P19339).">
                        <validator type="empty_field" message="Missing UniProt ID"/>
                    </param>
                </when>
            </conditional>
        </section>
        <section name="tool_parameters" title="Tool parameters" help="Configure this section to select the predictions to be executed">
            <param name="target" type="text" optional="true" label="Target RNA sequence (min. 5 nucleotides)" help="Provide a valid target RNA sequence. Leave blank if not specifying a target RNA so that the predictor will use the top-scoring RNA by default.">
                <validator type="regex" message="The sequence must consist only of RNA nucleotides (A, U, G, C) or be left blank.">^([AUGCaugc]+)?$</validator>
            </param>
            <param name="window_size" type="select" label="The window size to test">
                <option value="5" selected="true">5</option>
                <option value="3">3</option>
            </param>
        </section>
        <section name="output_options" title="Output parameters" help="Configure this section to define the tool output files">
            <param name="generate_plots" type="boolean" label="Generate score plots for all the RNA possible windows" help="Enable to generate score plots."/>
            <param name="top_scoring_rna" type="boolean" label="Find the top-scoring RNA for the specified RRM(s)" help="Enable to find and plot the top-scoring RNA. Attention: This option overrides the target RNA if present."/>
            <param name="generate_fasta" type="boolean" label="Generate a FASTA file for each input sequence aligned to the HMM" help="Enable to generate a FASTA file with aligned sequences."/>
        </section>
    </inputs>
    <outputs>
        <collection name="split_csv" type="list" label="Tabular predictions by sequence">
            <discover_datasets pattern="__designation_and_ext__" directory="tabular" visible="true"/>
        </collection>
        <collection name="split_json" type="list" label="Json predictions by sequence">
            <discover_datasets pattern="__designation_and_ext__" directory="json" visible="true"/>
        </collection>
        <collection name="split_aligned" type="list" label="Alignment in FASTA format by sequence">
            <discover_datasets pattern="__designation_and_ext__" directory="aligned" visible="true"/>
        </collection>
        <collection name="split_plots" type="list" label="Plots by sequence">
            <discover_datasets pattern="__designation_and_ext__" directory="plots" visible="true"/>
        </collection>
    </outputs>
    <tests>
        <!-- Test 1: UniProt ID P19339, Window Size 5, RNA AUGGCU -->
        <test expect_num_outputs="4">
            <section name="input_sequence">
                <conditional name="input_type_cond">
                    <param name="input_type" value="uniprot"/>
                    <param name="uniprot_id" value="P19339"/>
                </conditional>
            </section>
            <section name="tool_parameters">
                <param name="target" value="AUGGCU"/>
                <param name="window_size" value="5"/>
            </section>
            <assert_command>
                <has_text text="--uniprot 'P19339'"/>
                <has_text text="--rna 'AUGGCU'"/>
                <has_text text="--window_size 5"/>
                <has_text text="--json 'json'"/>
                <has_text text="--csv 'tabular'"/>
            </assert_command>
            <output_collection name="split_json" count="2"/>
            <output_collection name="split_csv" count="2"/>
            <output_collection name="split_aligned" count="0"/>
            <output_collection name="split_plots" count="0"/>
        </test>
        <!-- Test 2: UniProt ID P19339, Window Size 3, RNA AUGGCU -->
        <test expect_num_outputs="4">
            <section name="input_sequence">
                <conditional name="input_type_cond">
                    <param name="input_type" value="uniprot"/>
                    <param name="uniprot_id" value="P19339"/>
                </conditional>
            </section>
            <section name="tool_parameters">
                <param name="target" value="AUGGCU"/>
                <param name="window_size" value="3"/>
            </section>
            <assert_command>
                <has_text text="--uniprot 'P19339'"/>
                <has_text text="--rna 'AUGGCU'"/>
                <has_text text="--window_size 3"/>
                <has_text text="--json 'json'"/>
                <has_text text="--csv 'tabular'"/>
            </assert_command>
            <output_collection name="split_json" count="2"/>
            <output_collection name="split_csv" count="2"/>
            <output_collection name="split_aligned" count="0"/>
            <output_collection name="split_plots" count="0"/>
        </test>
        <!-- Test 3: UniProt ID P19339, Window Size 5, RNA AUGGCU, Aligned FASTA files -->
        <test expect_num_outputs="4">
            <section name="input_sequence">
                <conditional name="input_type_cond">
                    <param name="input_type" value="uniprot"/>
                    <param name="uniprot_id" value="P19339"/>
                </conditional>
            </section>
            <section name="tool_parameters">
                <param name="target" value="AUGGCU"/>
                <param name="window_size" value="5"/>
            </section>
            <section name="output_options">
                <param name="generate_fasta" value="true"/>
            </section>
            <assert_command>
                <has_text text="--uniprot 'P19339'"/>
                <has_text text="--rna 'AUGGCU'"/>
                <has_text text="--window_size 5"/>
                <has_text text="--json 'json'"/>
                <has_text text="--csv 'tabular'"/>
                <has_text text="--aligned 'aligned'"/>
            </assert_command>
            <output_collection name="split_json" count="2"/>
            <output_collection name="split_csv" count="2"/>
            <output_collection name="split_aligned" count="2"/>
            <output_collection name="split_plots" count="0"/>
        </test>
        <!-- Test 4: UniProt ID P19339, Window Size 5, Top RNA, with Plots -->
        <test expect_num_outputs="4">
            <section name="input_sequence">
                <conditional name="input_type_cond">
                    <param name="input_type" value="uniprot"/>
                    <param name="uniprot_id" value="P19339"/>
                </conditional>
            </section>
            <section name="tool_parameters">
                <param name="window_size" value="5"/>
            </section>
            <section name="output_options">
                <param name="top_scoring_rna" value="true"/>
                <param name="generate_plots" value="true"/>
            </section>
            <assert_command>
                <has_text text="--uniprot 'P19339'"/>
                <has_text text="--window_size 5"/>
                <has_text text="--json 'json'"/>
                <has_text text="--csv 'tabular'"/>
                <has_text text="--top"/>
            </assert_command>
            <output_collection name="split_json" count="2"/>
            <output_collection name="split_csv" count="0"/>
            <output_collection name="split_aligned" count="0"/>
            <output_collection name="split_plots" count="8"/>
        </test>
        <!-- Test 5: UniProt ID P19339, Window Size 5, Top RNA, with Plots, Aligned FASTA files -->
        <test expect_num_outputs="4">
            <section name="input_sequence">
                <conditional name="input_type_cond">
                    <param name="input_type" value="uniprot"/>
                    <param name="uniprot_id" value="P19339"/>
                </conditional>
            </section>
            <section name="tool_parameters">
                <param name="window_size" value="5"/>
            </section>
            <section name="output_options">
                <param name="top_scoring_rna" value="true"/>
                <param name="generate_plots" value="true"/>
                <param name="generate_fasta" value="true"/>
            </section>
            <assert_command>
                <has_text text="--uniprot 'P19339'"/>
                <has_text text="--window_size 5"/>
                <has_text text="--json 'json'"/>
                <has_text text="--csv 'tabular'"/>
                <has_text text="--plot 'plots'"/>
                <has_text text="--aligned 'aligned'"/>
                <has_text text="--top"/>
            </assert_command>
            <output_collection name="split_json" type="list" count="2"/>
            <output_collection name="split_aligned" type="list" count="2"/>
            <output_collection name="split_plots" type="list" count="8"/>
        </test>
        <!-- Test 6: UniProt ID P19339, Window Size 5, RNA AUGGCU, with Plots -->
        <test expect_num_outputs="4">
            <section name="input_sequence">
                <conditional name="input_type_cond">
                    <param name="input_type" value="uniprot"/>
                    <param name="uniprot_id" value="P19339"/>
                </conditional>
            </section>
            <section name="tool_parameters">
                <param name="target" value="AUGGCU"/>
                <param name="window_size" value="5"/>
            </section>
            <section name="output_options">
                <param name="generate_plots" value="true"/>
            </section>
            <assert_command>
                <has_text text="--uniprot 'P19339'"/>
                <has_text text="--rna 'AUGGCU'"/>
                <has_text text="--window_size 5"/>
                <has_text text="--json 'json'"/>
                <has_text text="--csv 'tabular'"/>
            </assert_command>
            <output_collection name="split_json" count="2"/>
            <output_collection name="split_csv" count="2"/>
            <output_collection name="split_aligned" count="0"/>
            <output_collection name="split_plots" count="2"/>
        </test>
        <!-- Test 7: UniProt ID P19339, Window Size 5, RNA AUGGCU, with Plots, Aligned FASTA files -->
        <test expect_num_outputs="4">
            <section name="input_sequence">
                <conditional name="input_type_cond">
                    <param name="input_type" value="uniprot"/>
                    <param name="uniprot_id" value="P19339"/>
                </conditional>
            </section>
            <section name="tool_parameters">
                <param name="target" value="AUGGCU"/>
                <param name="window_size" value="5"/>
            </section>
            <section name="output_options">
                <param name="top_scoring_rna" value="false"/>
                <param name="generate_fasta" value="true"/>
                <param name="generate_plots" value="true"/>
            </section>
            <assert_command>
                <has_text text="--uniprot 'P19339'"/>
                <has_text text="--window_size 5"/>
                <has_text text="--json 'json'"/>
                <has_text text="--csv 'tabular'"/>
                <has_text text="--plot 'plots'"/>
                <not_has_text text="--top"/>
            </assert_command>
            <output_collection name="split_json" count="2"/>
            <output_collection name="split_csv" count="2"/>
            <output_collection name="split_aligned" count="2"/>
            <output_collection name="split_plots" count="2"/>
        </test>
        <!-- Test 8: Fasta file, Window Size 5, RNA AUGGCU, Aligned FASTA files -->
        <test expect_num_outputs="4">
            <section name="input_sequence">
                <conditional name="input_type_cond">
                    <param name="input_type" value="fasta"/>
                    <param name="fasta_input" value="input.fasta" ftype="fasta"/>
                </conditional>
            </section>
            <section name="tool_parameters">
                <param name="target" value="AUGGCU"/>
                <param name="window_size" value="5"/>
            </section>
            <section name="output_options">
                <param name="generate_fasta" value="true"/>
            </section>
            <assert_command>
                <has_text text="--fasta"/>
                <has_text text="--rna 'AUGGCU'"/>
                <has_text text="--window_size 5"/>
                <has_text text="--json 'json'"/>
                <has_text text="--csv 'tabular'"/>
            </assert_command>
            <output_collection name="split_json" count="2"/>
            <output_collection name="split_csv" count="2"/>
            <output_collection name="split_aligned" count="2"/>
            <output_collection name="split_plots" type="list" count="0"/>
        </test>
        <!-- Test 9: Fasta file, Window Size 5, Top RNA, Aligned FASTA files -->
        <test expect_num_outputs="4">
            <section name="input_sequence">
                <conditional name="input_type_cond">
                    <param name="input_type" value="fasta"/>
                    <param name="fasta_input" value="input.fasta" ftype="fasta"/>
                </conditional>
            </section>
            <section name="tool_parameters">
                <param name="window_size" value="5"/>
            </section>
            <section name="output_options">
                <param name="top_scoring_rna" value="true"/>
                <param name="generate_fasta" value="true"/>
            </section>
            <assert_command>
                <has_text text="--fasta"/>
                <not_has_text text="--rna 'AUGGCU'"/>
                <has_text text="--window_size 5"/>
                <has_text text="--json 'json'"/>
                <has_text text="--csv 'tabular'"/>
                <has_text text="--aligned 'aligned'"/>
                <has_text text="--top"/>
            </assert_command>
            <output_collection name="split_csv" type="list" count="0"/>
            <output_collection name="split_json" type="list" count="2"/>
            <output_collection name="split_plots" type="list" count="0"/>
            <output_collection name="split_aligned" count="2"/>
        </test>
        <!-- Test 10: Fasta file, Window Size 5, Top RNA, with Plots, Aligned FASTA files -->
        <test expect_num_outputs="4">
            <section name="input_sequence">
                <conditional name="input_type_cond">
                    <param name="input_type" value="fasta"/>
                    <param name="fasta_input" value="input.fasta" ftype="fasta"/>
                </conditional>
            </section>
            <section name="tool_parameters">
                <param name="window_size" value="5"/>
            </section>
            <section name="output_options">
                <param name="top_scoring_rna" value="true"/>
                <param name="generate_fasta" value="true"/>
                <param name="generate_plots" value="true"/>
            </section>
            <assert_command>
                <has_text text="--fasta"/>
                <not_has_text text="--rna 'AUGGCU'"/>
                <has_text text="--window_size 5"/>
                <has_text text="--json 'json'"/>
                <has_text text="--csv 'tabular'"/>
                <has_text text="--plot 'plots'"/>
                <has_text text="--aligned 'aligned'"/>
                <has_text text="--top"/>
            </assert_command>
            <output_collection name="split_csv" type="list" count="0"/>
            <output_collection name="split_json" type="list" count="2"/>
            <output_collection name="split_aligned" type="list" count="2"/>
            <output_collection name="split_plots" type="list" count="8"/>
        </test>
        <!-- Test 11: Fasta file, Window Size 5, RNA AUGGCU, Aligned FASTA files -->
        <test expect_num_outputs="4">
            <section name="input_sequence">
                <conditional name="input_type_cond">
                    <param name="input_type" value="fasta"/>
                    <param name="fasta_input" value="input.fasta" ftype="fasta"/>
                </conditional>
            </section>
            <section name="tool_parameters">
                <param name="window_size" value="5"/>
                <param name="target" value="AUGGCU"/>
            </section>
            <section name="output_options">
                <param name="top_scoring_rna" value="false"/>
                <param name="generate_fasta" value="true"/>
            </section>
            <assert_command>
                <has_text text="--fasta"/>
                <has_text text="--rna 'AUGGCU'"/>
                <has_text text="--window_size 5"/>
                <has_text text="--json 'json'"/>
                <has_text text="--csv 'tabular'"/>
                <has_text text="--aligned 'aligned'"/>
                <not_has_text text="--top"/>
            </assert_command>
            <output_collection name="split_csv" type="list" count="2"/>
            <output_collection name="split_json" type="list" count="2"/>
            <output_collection name="split_plots" type="list" count="0"/>
            <output_collection name="split_aligned" count="2"/>
        </test>
        <!-- Test 12: Fasta file, Window Size 5, RNA AUGGCU, with Plots, Aligned FASTA files -->
        <test expect_num_outputs="4">
            <section name="input_sequence">
                <conditional name="input_type_cond">
                    <param name="input_type" value="fasta"/>
                    <param name="fasta_input" value="input.fasta" ftype="fasta"/>
                </conditional>
            </section>
            <section name="tool_parameters">
                <param name="window_size" value="5"/>
                <param name="target" value="AUGGCU"/>
            </section>
            <section name="output_options">
                <param name="top_scoring_rna" value="false"/>
                <param name="generate_fasta" value="true"/>
                <param name="generate_plots" value="true"/>
            </section>
            <assert_command>
                <has_text text="--fasta"/>
                <has_text text="--rna 'AUGGCU'"/>
                <has_text text="--window_size 5"/>
                <has_text text="--json 'json'"/>
                <has_text text="--csv 'tabular'"/>
                <has_text text="--plot 'plots'"/>
                <has_text text="--aligned 'aligned'"/>
                <not_has_text text="--top"/>
            </assert_command>
            <output_collection name="split_csv" type="list" count="2"/>
            <output_collection name="split_json" type="list" count="2"/>
            <output_collection name="split_plots" type="list" count="2"/>
            <output_collection name="split_aligned" count="2"/>
        </test>
        <!-- Test 13: Non-RRM-RNA protein (P05067) -->
        <test>
            <section name="input_sequence">
                <conditional name="input_type_cond">
                    <param name="input_type" value="uniprot"/>
                    <param name="uniprot_id" value="P05067"/>
                </conditional>
            </section>
            <section name="tool_parameters">
                <param name="target" value="AUGGCU"/>
                <param name="window_size" value="5"/>
            </section>
            <section name="output_options">
                <param name="top_scoring_rna" value="false"/>
                <param name="generate_plots" value="false"/>
                <param name="generate_fasta" value="false"/>
            </section>
            <assert_command>
                <has_text text="--uniprot 'P05067'"/>
                <has_text text="--rna 'AUGGCU'"/>
                <has_text text="--window_size 5"/>
                <has_text text="--json 'json'"/>
                <has_text text="--csv 'tabular'"/>
            </assert_command>
            <output_collection name="split_csv" type="list" count="0"/>
            <output_collection name="split_json" type="list" count="0"/>
            <output_collection name="split_plots" type="list" count="0"/>
            <output_collection name="split_aligned" type="list" count="0"/>
        </test>
    </tests>
    <creator>
        <organization name="Bio2Byte, Vrije Universiteit Brussel (VUB)" address="Interuniversity Institute Bioinformatics Brussels, Université Libre de Bruxelles, 1050 Ixelles, Brussels, Belgium" url="https://bio2byte.be/rrmscorer" email="bio2byte@vub.be" image="https://0.gravatar.com/avatar/2b51fb7600d876086669bcc85a941b763a81d1c2bb3c667b8c83a1aa892cf740"/>
    </creator>
    <help><![CDATA[
    This tool allows you to predict RNA Recognition Motif (RRM) scores for protein
    sequences provided in *FASTA* format or as *UniProt IDs*.

    **RRMScorer** is designed to predict RNA binding preferences for proteins containing
    RNA recognition motifs (RRMs), the most prevalent RNA binding domain in eukaryotes.

    **Abstract:**

    By carefully analysing a dataset of 187 RRM-RNA structural complexes, we calculated
    residue-level binding scores using a probabilistic model derived from
    amino acid-nucleotide interaction propensities, which are the basis of
    **RRMScorer**.

    With its ability to provide residue-level insights and accurate predictions,
    **RRMScorer** serves as a valuable tool for researchers exploring the functional
    landscape of RRM-RNA interactions.

    **Methodology**:

    The input sequence is scanned against our RRMScorer hidden Markov model (HMM)
    to (i) identify whether the input sequence contains any RRM domain and (ii)
    map to the 20 positions in the RRM protein sequence alignment that we use to compute
    the RNA binding scores.

    If one or more RRM domains are identified in the input sequence, RRMScorer computes
    the score of the user-defined RNA sequence, or if absent, the scores for all the
    1024 RNA possible sequences with a length of 5 nt. By utilizing a specific
    RNA sequence, the user can inspect to which 5-nt windows the RRM is more likely
    to bind.

    **Input fields:**

    * **Protein sequence(s) FASTA format:** Provide a *FASTA* file containing the protein sequences.
    * **Protein identifier from Uniprot:** Provide a *UniProt ID* (e.g. P19339).
    * **Custom RNA target:** Enable or disable the use of a custom RNA target for the predictions.
    * **Target:** Provide a valid target RNA sequence (minimum 5 nucleotides) if Custom RNA target is enabled.
    * **The window size to test:** Select the window size (either 3 or 5 nucleotides).
    * **Plot options:** Configure plot output options.

    **Output:**

    The results are provided in comprehensive bar plots as well as in
    CSV and JSON formats. When a custom RNA is not provided, the results will
    include protein sequence logos for a range of top-scoring RNA sequences, as well
    as the aforementioned CSV and JSON files with the scores.

    **Funding:**
    This project has received funding from the European Union's Horizon 2020 research
    and innovation programme under the Marie Skłodowska-Curie grant
    agreement No. 813239. This work was supported by the European Regional
    Development Fund and Brussels-Capital Region-Innoviris within the framework of the
    Operational Programme 2014-2020 (ERDF-2020 project ICITY-RDI.BRU)
    ]]>
    </help>
    <citations>
        <citation type="doi">10.1371/journal.pcbi.1010859</citation>
        <citation type="doi">10.1093/nar/gkaf367</citation>
    </citations>
</tool>