diff teloscope.xml @ 0:be2c72b9798b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/teloscope commit 91ae0a450a15bf0a37c017b57831d40e2cea8dbc
author iuc
date Thu, 27 Nov 2025 08:52:19 +0000
parents
children 089c6e8122c8
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/teloscope.xml	Thu Nov 27 08:52:19 2025 +0000
@@ -0,0 +1,195 @@
+<tool id="teloscope" name="Teloscope" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Assembly telomere annotation</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <version_command>teloscope -v</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        mkdir -p output &&
+        teloscope
+            --input-sequence '$input_sequence'
+            --output output
+            --canonical '$canonical'
+            --patterns '$patterns'
+            --window '$window'
+            --step '$step'
+            --threads "\${GALAXY_SLOTS:-2}"
+            --terminal-limit '$terminal_limit'
+            --max-match-distance '$max_match_distance'
+            --max-block-distance '$max_block_distance'
+            --min-block-length '$min_block_length'
+            --min-block-density '$min_block_density'
+            
+            $out_win_repeats
+            $out_gc
+            $out_entropy
+            $out_matches
+            $out_its
+            $ultra_fast
+            $verbose
+            > output/${input_sequence.name}.telo.report
+    ]]></command>
+    <inputs>
+        <param argument="--input-sequence" type="data" format="fasta,fasta.gz" label="Input assembly"/>
+        <param argument="--canonical" type="text" value="TTAGGG" label="Canonical telomeric pattern">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value=" "/>
+                    <remove value="\t"/>
+                    <remove value="\r"/>
+                    <remove value="\n"/>
+                </valid>
+            </sanitizer>
+            <validator type="regex" message="Canonical must contain only A,C,G,T.">^[ACGTacgt]+$</validator>
+        </param>
+
+        <param argument="--patterns" type="text" value="TTAGGG,CCCTAA"
+            label="Patterns to explore (comma-separated), IUPAC allowed">
+            <validator type="regex"
+                    message="Use IUPAC letters ACGTRYSWKMBDHVN; comma-separated.">^[ACGTRYSWKMBDHVNacgtryswkmbdhvn]+(?:,[ACGTRYSWKMBDHVNacgtryswkmbdhvn]+)*$</validator>
+        </param>
+
+        <param argument="--window" type="integer" min="100" value="1000" label="Sliding window size"/>
+        <param argument="--step" type="integer" min="100" value="500" label="Sliding window step"/>
+        <param argument="--terminal-limit" type="integer" min="10000" value="50000" label="Terminal limit (bp) in contigs"/>
+        <param argument="--max-match-distance" type="integer" min="0" value="50" label="Maximum distance (bp) for merging matches"/>
+        <param argument="--max-block-distance" type="integer" min="0" value="200" label="Maximum block distance for merging"/>
+        <param argument="--min-block-length" type="integer" min="0" value="500" label="Minimum block length"/>
+        <param argument="--min-block-density" type="float" min="0" max="1" value="0.5" label="Minimum block density (0–1)"/>
+
+        <param argument="--out-win-repeats" type="boolean" truevalue="--out-win-repeats" falsevalue="" checked="false" label="Window repeat counts"/>
+        <param argument="--out-gc" type="boolean" truevalue="--out-gc" falsevalue="" checked="false" label="Window GC"/>
+        <param argument="--out-entropy" type="boolean" truevalue="--out-entropy" falsevalue="" checked="false" label="Window Shannon entropy"/>
+        <param argument="--out-matches" type="boolean" truevalue="--out-matches" falsevalue="" checked="false" label="Canonical/NonCanonical Matches"/>
+        <param argument="--out-its" type="boolean" truevalue="--out-its" falsevalue="" checked="false" label="Interstitial telomeres (ITSs)"/>
+        <param argument="--ultra-fast" type="boolean" truevalue="--ultra-fast" falsevalue="" checked="true" label="Ultra-fast mode (terminal regions only)"/>
+        <param argument="--verbose" type="boolean" truevalue="--verbose" falsevalue="" checked="false" label="Verbose output"/>
+    </inputs>
+
+    <outputs>
+        <!-- BASIC OUTFILES -->
+        <data name="terminal_telomeres" format="bed" from_work_dir="output/*_terminal_telomeres.bed" label="${tool.name} on ${on_string}: Terminal telomeres"/>
+        <data name="telo_report" format="tabular" from_work_dir="output/*.telo.report" label="${tool.name} on ${on_string}: Summary report"/>
+
+        <!-- OPTIONAL OUTFILES -->
+        <data name="interstitial_telomeres" format="bed" from_work_dir="output/*_interstitial_telomeres.bed" label="${tool.name} on ${on_string}: Interstitial telomeres">
+            <filter>out_its</filter>
+        </data>
+        <data name="canonical_matches" format="bed" from_work_dir="output/*_canonical_matches.bed" label="${tool.name} on ${on_string}: Canonical matches">
+            <filter>out_matches</filter>
+        </data>
+        <data name="noncanonical_matches" format="bed" from_work_dir="output/*_noncanonical_matches.bed" label="${tool.name} on ${on_string}: Noncanonical matches">
+            <filter>out_matches</filter>
+        </data>
+        <data name="window_metrics" format="bedgraph" from_work_dir="output/*_window_metrics.bedgraph" label="${tool.name} on ${on_string}: Window metrics">
+            <filter>out_gc or out_entropy or out_win_repeats</filter>
+        </data>
+    </outputs>
+
+    <tests>
+        <!-- 1) Default (-u) : terminal telomeres + report -->
+        <test expect_num_outputs="2">
+            <param name="input_sequence" value="bTaeGut7_chr33_mat.fa.gz" ftype="fasta.gz"/>
+            <param name="canonical" value="TTAGGG"/>
+            <param name="patterns" value="TTAGGG,CCCTAA"/>
+            <output name="terminal_telomeres">
+                <assert_contents>
+                    <!-- at least one BED-like line (match whole line to allow extra cols) -->
+                    <has_line_matching expression="^\S+\t\d+\t\d+(?:\t.*)?$"/>
+                </assert_contents>
+            </output>
+            <output name="telo_report">
+                <assert_contents>
+                    <has_line_matching expression="\+\+\+ Path Summary Report \+\+\+"/>
+                    <has_line_matching expression="\+\+\+ Assembly Summary Report \+\+\+"/>
+                    <has_line_matching expression="\+\+\+ Telomere Statistics \+\+\+"/>
+                    <has_line_matching expression="\+\+\+ Chromosome Telomere Counts\+\+\+"/>
+                    <has_line_matching expression="\+\+\+ Chromosome Telomere/Gap Completeness\+\+\+"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- 2) Genome-wide (-g -e -r -m -i ; disable -u) -->
+        <test expect_num_outputs="6">
+            <param name="input_sequence" value="bTaeGut7_chr33_mat.fa.gz" ftype="fasta.gz"/>
+            <param name="canonical" value="TTAGGG"/>
+            <param name="patterns" value="TTAGGG,CCCTAA"/>
+            <param name="out_gc" value="true"/>
+            <param name="out_entropy" value="true"/>
+            <param name="out_win_repeats" value="true"/>
+            <param name="out_matches" value="true"/>
+            <param name="out_its" value="true"/>
+            <param name="ultra_fast" value="false"/>
+
+            <output name="terminal_telomeres">
+                <assert_contents>
+                    <has_line_matching expression="^\S+\t\d+\t\d+(?:\t.*)?$"/>
+                </assert_contents>
+            </output>
+            <output name="interstitial_telomeres">
+                <assert_contents>
+                    <has_line_matching expression="^\S+\t\d+\t\d+(?:\t.*)?$"/>
+                </assert_contents>
+            </output>
+            <output name="canonical_matches">
+                <assert_contents>
+                    <!-- ensure at least one CCCTAA match line -->
+                    <has_line_matching expression="^\S+\t\d+\t\d+\tCCCTAA(?:\s|$)"/>
+                </assert_contents>
+            </output>
+            <output name="noncanonical_matches">
+                <assert_contents>
+                    <!-- expect an empty file -->
+                    <has_n_lines n="0"/>
+                </assert_contents>
+            </output>
+            <output name="window_metrics">
+                <assert_contents>
+                    <has_text text="track type=bedGraph"/>
+                    <!-- first data window line -->
+                    <has_line_matching expression="^\S+\t0\t1000\t.*"/>
+                </assert_contents>
+            </output>
+            <output name="telo_report">
+                <assert_contents>
+                    <has_line_matching expression="\+\+\+ Path Summary Report \+\+\+"/>
+                    <has_line_matching expression="\+\+\+ Assembly Summary Report \+\+\+"/>
+                    <has_line_matching expression="\+\+\+ Telomere Statistics \+\+\+"/>
+                    <has_line_matching expression="\+\+\+ Chromosome Telomere Counts\+\+\+"/>
+                    <has_line_matching expression="\+\+\+ Chromosome Telomere/Gap Completeness\+\+\+"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    
+    <help><![CDATA[
+        Description:
+        Teloscope is a tool for telomere annotation in genome assemblies. 
+        It scans for user-specified telomeric repeat patterns across assembly paths, contigs and windows.
+        Teloscope annotates terminal and interstitial telomeres, canonical/noncanonical matches and genome-wide metrics such as GC content, Shannon entropy, and repeat counts.
+        It generates a detailed telomere summary report for paths, telomere statistics, and chromosome labels to assess telomere completeness.
+        Teloscope can be used for both complete and fragmented assemblies, providing valuable information for genome manual curation and analysis.
+
+        Usage:
+        Default (ultra-fast) scans terminal regions and reports terminal telomeres + a summary report.
+        * ${input_sequence.name}_terminal_telomeres.bed
+        * ${input_sequence.name}.telo.report
+
+        Enabling window/match options (-g -e -r -m -i) performs a genome-wide scan and produces:
+        * ${input_sequence.name}_terminal_telomeres.bed
+        * ${input_sequence.name}_interstitial_telomeres.bed
+        * ${input_sequence.name}_canonical_matches.bed
+        * ${input_sequence.name}_noncanonical_matches.bed
+        * ${input_sequence.name}_window_metrics.bedgraph
+        * ${input_sequence.name}.telo.report
+
+        Key parameters:
+        - -c / --canonical: Canonical repeat (default TTAGGG). This is the vertebrate telomeric motif found at chromosome ends that binds to shelterin complex to form a telomere.
+        - -p / --patterns: Variant patterns (comma-separated). These are additional telomeric repeat motifs to search for, besides the canonical repeat, it includes other variants that can be part of telomeres.
+        - -w / -s: window size / step (defaults 1000/500)
+        - -u / --ultra-fast: terminal scan only (default true); disabled automatically when -g/-e/-r/-m/-i are used.
+    ]]></help>
+
+    <expand macro="citations"/>
+</tool>