view icescreen.xml @ 5:6262a9977181 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/icescreen commit c45c31257dbfd6991c86cee56bb8961e4c5cbe47
author iuc
date Sun, 23 Feb 2025 16:52:24 +0000
parents 49bf81065a47
children
line wrap: on
line source

<tool id="icescreen" name="ICEscreen" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.09">
    <description>annotates ICEs and IMEs in Bacillota genomes</description>
    
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="xrefs"/>
    <expand macro="requirements"/>
    
    <version_command><![CDATA[
        icescreen --version
        ]]>
    </version_command>
    <command detect_errors="aggressive"><![CDATA[
        #set $gbname = 'galaxy_genbank'
        mkdir -p ./tmp_icescreen/source_genbank
        && mkdir -p ./tmp_icescreen/icescreen_result
        && ln -s '${genome}' ./tmp_icescreen/source_genbank/${gbname}.gb
        && icescreen
            --galaxy
            --gbdir ./tmp_icescreen/source_genbank
            --outdir ./tmp_icescreen/icescreen_result
            --phylum '${taxonomy.phylum}'
            --jobs "\${GALAXY_SLOTS:-4}" > ./tmp_icescreen/ICEscreen_log.txt 2>&1
            
        && cat ./tmp_icescreen/icescreen_result/ICEscreen_results/${gbname}/detected_mobile_elements/${gbname}_detected_ME.summary > '${summary}'
        && cat ./tmp_icescreen/icescreen_result/ICEscreen_results/${gbname}/detected_mobile_elements/${gbname}_detected_SP_withMEIds.tsv > '${detected_sp}'
        && cat ./tmp_icescreen/icescreen_result/ICEscreen_results/${gbname}/detected_mobile_elements/${gbname}_detected_ME.tsv > '${detected_me}'

        ]]>
    </command>
    <inputs>
        <param name="genome" type="data" format="genbank" label="Input genomes to analyze in Genbank format" help="Multi-genbank files (i.e. gbff) are supported. Each record must include the nucleotide sequence."/>
        <section name="taxonomy" title="Phylum of the genomes to analyse" expanded="True">
            <param name="phylum" type="select" label="Bacillota is the defaut parameter">
                <option value="bacillota" selected="true">Bacillota</option>
            </param>
        </section>
        <section name="additional_output" title="Additional output files" expanded="False">
            <param name="optional_files" type="select" label="Make the following optional files available in the Galaxy history" multiple="true" optional="true" display="checkboxes" >
                <option value="output_annotation_genbank" selected="false">Annotation in genbank format (full)</option>
                <option value="output_annotation_gff" selected="false">Annotation in gff format (ICEs and IMEs only)</option>
                <option value="output_annotation_embl" selected="false">Annotation in embl format (ICEs and IMEs only)</option>
                <option value="output_tar_gz_intermediate_files" selected="false">Archive of intermediate files</option>
                <option value="output_log" selected="false">Log file</option>
                <option value="output_param_conf" selected="false">Param conf file</option>
            </param>
        </section>
    </inputs>
    <outputs>
      <data format="tabular" name="detected_sp" label="${tool.name} on ${on_string}: Signature Proteins table" />
      <data format="tabular" name="detected_me" label="${tool.name} on ${on_string}: ICEs/IMEs table" />
      <data format="txt" name="summary" label="${tool.name} on ${on_string}: results summary" />
      <data format="genbank.gz" name="gbout" from_work_dir="./tmp_icescreen/icescreen_result/ICEscreen_results/galaxy_genbank/detected_mobile_elements/standard_genome_annotation_formats/galaxy_genbank_icescreen.gb.gz" label="${tool.name} on ${on_string}: genbank annotation (full)" >
          <filter>additional_output['optional_files'] and "output_annotation_genbank" in additional_output['optional_files']</filter>
      </data>
      <data format="gff3.gz" name="gffout" from_work_dir="./tmp_icescreen/icescreen_result/ICEscreen_results/galaxy_genbank/detected_mobile_elements/standard_genome_annotation_formats/galaxy_genbank_icescreen.gff.gz" label="${tool.name} on ${on_string}: GFF3 annotation (ICEs and IMEs only)" >
          <filter>additional_output['optional_files'] and "output_annotation_gff" in additional_output['optional_files']</filter>
      </data>
      <data format="embl.gz" name="emblout" from_work_dir="./tmp_icescreen/icescreen_result/ICEscreen_results/galaxy_genbank/detected_mobile_elements/standard_genome_annotation_formats/galaxy_genbank_icescreen.embl.gz" label="${tool.name} on ${on_string}: EMBL annotation (ICEs and IMEs only)" >
          <filter>additional_output['optional_files'] and "output_annotation_embl" in additional_output['optional_files']</filter>          
      </data>
      <data format="tar.gz" name="intermediate_tar_gz" from_work_dir="./tmp_icescreen/icescreen_result/ICEscreen_results/galaxy_genbank/tmp_intermediate_files.tar.gz" label="${tool.name} on ${on_string}: intermediate files archive">
          <filter>additional_output['optional_files'] and "output_tar_gz_intermediate_files" in additional_output['optional_files']</filter>
      </data>
      <data format="txt" name="log" from_work_dir="./tmp_icescreen/ICEscreen_log.txt" label="${tool.name} on ${on_string}: log file" >
          <filter>additional_output['optional_files'] and "output_log" in additional_output['optional_files']</filter>
      </data>
      <data format="gz" name="param_conf" from_work_dir="./tmp_icescreen/icescreen_result/ICEscreen_results/galaxy_genbank/param.conf.gz" label="${tool.name} on ${on_string}: param conf file" >
          <filter>additional_output['optional_files'] and "output_param_conf" in additional_output['optional_files']</filter>
      </data>
    </outputs>
    <tests>
        <test expect_num_outputs="3">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <output name="summary" file="NC_004668_137848_164286_detected_ME.summary" ftype="txt" />
            <output name="detected_me" file="NC_004668_137848_164286_detected_ME.tsv" ftype="tabular" />
            <output name="detected_sp" >
                <assert_contents>
                    <has_text text="ICE_IME_id" />
                    <has_text text="WP_002359295" />
                    <has_text text="VirB4" />
                    <has_n_columns n="54" />
                    <has_n_lines n="5" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_annotation_genbank" />
            <output name="gbout" ftype="genbank.gz">
                <assert_contents>
                    <has_size value="19794" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_annotation_gff" />
            <output name="gffout" ftype="gff3.gz">
                <assert_contents>
                    <has_size value="794" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_annotation_embl" />
            <output name="emblout" ftype="embl.gz">
                <assert_contents>
                    <has_size value="1002" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_tar_gz_intermediate_files" />
            <output name="intermediate_tar_gz" >
                <assert_contents>
                    <has_archive_member path=".*/*_detected_SP.tsv"><has_text text="Is_hit_blast" /></has_archive_member>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="optional_files" value="output_log" />
            <output name="log" >
                <assert_contents>
                    <has_text text="bacillota" />
                    <has_text text="Building DAG of jobs" />
                    <has_text text="Complete log" />
                    <not_has_text text="Error" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="phylum" value="bacillota" />
            <param name="optional_files" value="output_log" />
            <output name="log" >
                <assert_contents>
                    <has_text text="bacillota" />
                    <not_has_text text="Error" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="6">
            <param name="genome" value="genbank/NC_004668_137848_164286.gb" ftype="genbank" />
            <param name="phylum" value="bacillota" />
            <param name="optional_files" value="output_tar_gz_intermediate_files,output_log,output_param_conf" />
            <output name="log" >
                <assert_contents>
                    <has_text text="bacillota" />
                    <not_has_text text="Error" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <param name="genome" value="genbank/NC_013798_298468_322494_NC_020450_643089_661957.gb" ftype="genbank" />
            <output name="summary" file="NC_013798_298468_322494_NC_020450_643089_661957_detected_ME.summary" ftype="txt" />
            <output name="detected_me" file="NC_013798_298468_322494_NC_020450_643089_661957_detected_ME.tsv" ftype="tabular" />
            <output name="detected_sp" >
                <assert_contents>
                    <has_text text="ICE_IME_id" />
                    <has_text text="WP_044555479.1" />
                    <has_text text="WP_015426013.1" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
        .. class:: warningmark

        ICEscreen requires input files in genbank format. Multigenbank files (i.e. gbff files featuring multiple genome records back to back) are supported. Each Genbank record must include the nucleotide sequence.

-----

        **What it does**

        ICEscreen is a bioinformatic pipeline for the detection and annotation of ICEs (Integrative and Conjugative Elements) and IMEs (Integrative and Mobilizable Elements) in Bacillota genomes. The full documentation is available at https://icescreen.migale.inrae.fr. The forge of the project is accessible at https://forgemia.inra.fr/ices_imes_analysis/icescreen.

        **Main features of ICEscreen**

        - Detection of signature proteins (SPs) of ICEs/IMEs by using blastP on a curated resource. BlastP allows for an accurate assignment of hits to a given ICE/IME superfamily or family. The curated resource was derived from an analysis of over 120 ICEs and IMEs in Streptococcus genomes by the DINAMIC lab.
        - Detection of distant homologs of SPs by using HMM profiles of ICEs/IMEs protein families. The HMM profiles have been either imported from trusted resources or created and curated when needed.
        - Detection of the ICE/IME structures: ICEScreen groups together SPs that belong to the same ICE/IME structures to the best of its ability.
        - Delimitation of the elements at the gene or nucleotide level is not yet implemented and still needs manual curation.

        **Output files**

        There are 3 main output results files generated by ICEscreen:

        - Detected Signature Proteins table (`*_detected_SP_withMEIds.csv`): list of the signature proteins detected by the tool and their possible assignment to an ICE or IME element. Each line represents a signature protein detected by ICEscreen.
        - Detected ICEs/IMEs table (`*_detected_ME.tsv`): list of the ICEs and IMEs elements detected by the tool, including information about the signature proteins they contain. Information in this file is complementary to the _withICEIMEIds.csv file above with each line representing an ICE / IME structures instead of a signature protein.
        - Results summary (`*_detected_ME.summary`): this file summarizes the main parameters and statistics regarding the ICE / IME structures and the SPs.

        Other optional and additional output files generated by ICEscreen:

        - Genbank annotation file (full): annotations of the original Genbank file as well as annotations of ICEscreen on ICEs, IMEs, and the detected signature proteins.
        - EMBL and GFF3 annotation files (ICEs and IMEs only): annotations of ICEscreen on ICEs, IMEs, and the detected signature proteins.
        - Intermediate files (tar.gz): You can download this file locally and uncompress it to have the intermediate files generated by the tool. See https://icescreen.migale.inrae.fr for details.
        - Log file (`*_detected_ME.log`): this file contains the log of each step of the ICEscreen pipeline.
        - Param conf file (`param.conf.gz`): this file contains the configuration parameters related to the ICEscreen run.

    ]]>

    </help>

    <expand macro="citation"/>
    
</tool>