Mercurial > repos > iuc > teloscope
diff teloscope.xml @ 0:be2c72b9798b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/teloscope commit 91ae0a450a15bf0a37c017b57831d40e2cea8dbc
| author | iuc |
|---|---|
| date | Thu, 27 Nov 2025 08:52:19 +0000 |
| parents | |
| children | 089c6e8122c8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/teloscope.xml Thu Nov 27 08:52:19 2025 +0000 @@ -0,0 +1,195 @@ +<tool id="teloscope" name="Teloscope" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Assembly telomere annotation</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <version_command>teloscope -v</version_command> + <command detect_errors="exit_code"><![CDATA[ + mkdir -p output && + teloscope + --input-sequence '$input_sequence' + --output output + --canonical '$canonical' + --patterns '$patterns' + --window '$window' + --step '$step' + --threads "\${GALAXY_SLOTS:-2}" + --terminal-limit '$terminal_limit' + --max-match-distance '$max_match_distance' + --max-block-distance '$max_block_distance' + --min-block-length '$min_block_length' + --min-block-density '$min_block_density' + + $out_win_repeats + $out_gc + $out_entropy + $out_matches + $out_its + $ultra_fast + $verbose + > output/${input_sequence.name}.telo.report + ]]></command> + <inputs> + <param argument="--input-sequence" type="data" format="fasta,fasta.gz" label="Input assembly"/> + <param argument="--canonical" type="text" value="TTAGGG" label="Canonical telomeric pattern"> + <sanitizer> + <valid initial="string.printable"> + <remove value=" "/> + <remove value="\t"/> + <remove value="\r"/> + <remove value="\n"/> + </valid> + </sanitizer> + <validator type="regex" message="Canonical must contain only A,C,G,T.">^[ACGTacgt]+$</validator> + </param> + + <param argument="--patterns" type="text" value="TTAGGG,CCCTAA" + label="Patterns to explore (comma-separated), IUPAC allowed"> + <validator type="regex" + message="Use IUPAC letters ACGTRYSWKMBDHVN; comma-separated.">^[ACGTRYSWKMBDHVNacgtryswkmbdhvn]+(?:,[ACGTRYSWKMBDHVNacgtryswkmbdhvn]+)*$</validator> + </param> + + <param argument="--window" type="integer" min="100" value="1000" label="Sliding window size"/> + <param argument="--step" type="integer" min="100" value="500" label="Sliding window step"/> + <param argument="--terminal-limit" type="integer" min="10000" value="50000" label="Terminal limit (bp) in contigs"/> + <param argument="--max-match-distance" type="integer" min="0" value="50" label="Maximum distance (bp) for merging matches"/> + <param argument="--max-block-distance" type="integer" min="0" value="200" label="Maximum block distance for merging"/> + <param argument="--min-block-length" type="integer" min="0" value="500" label="Minimum block length"/> + <param argument="--min-block-density" type="float" min="0" max="1" value="0.5" label="Minimum block density (0–1)"/> + + <param argument="--out-win-repeats" type="boolean" truevalue="--out-win-repeats" falsevalue="" checked="false" label="Window repeat counts"/> + <param argument="--out-gc" type="boolean" truevalue="--out-gc" falsevalue="" checked="false" label="Window GC"/> + <param argument="--out-entropy" type="boolean" truevalue="--out-entropy" falsevalue="" checked="false" label="Window Shannon entropy"/> + <param argument="--out-matches" type="boolean" truevalue="--out-matches" falsevalue="" checked="false" label="Canonical/NonCanonical Matches"/> + <param argument="--out-its" type="boolean" truevalue="--out-its" falsevalue="" checked="false" label="Interstitial telomeres (ITSs)"/> + <param argument="--ultra-fast" type="boolean" truevalue="--ultra-fast" falsevalue="" checked="true" label="Ultra-fast mode (terminal regions only)"/> + <param argument="--verbose" type="boolean" truevalue="--verbose" falsevalue="" checked="false" label="Verbose output"/> + </inputs> + + <outputs> + <!-- BASIC OUTFILES --> + <data name="terminal_telomeres" format="bed" from_work_dir="output/*_terminal_telomeres.bed" label="${tool.name} on ${on_string}: Terminal telomeres"/> + <data name="telo_report" format="tabular" from_work_dir="output/*.telo.report" label="${tool.name} on ${on_string}: Summary report"/> + + <!-- OPTIONAL OUTFILES --> + <data name="interstitial_telomeres" format="bed" from_work_dir="output/*_interstitial_telomeres.bed" label="${tool.name} on ${on_string}: Interstitial telomeres"> + <filter>out_its</filter> + </data> + <data name="canonical_matches" format="bed" from_work_dir="output/*_canonical_matches.bed" label="${tool.name} on ${on_string}: Canonical matches"> + <filter>out_matches</filter> + </data> + <data name="noncanonical_matches" format="bed" from_work_dir="output/*_noncanonical_matches.bed" label="${tool.name} on ${on_string}: Noncanonical matches"> + <filter>out_matches</filter> + </data> + <data name="window_metrics" format="bedgraph" from_work_dir="output/*_window_metrics.bedgraph" label="${tool.name} on ${on_string}: Window metrics"> + <filter>out_gc or out_entropy or out_win_repeats</filter> + </data> + </outputs> + + <tests> + <!-- 1) Default (-u) : terminal telomeres + report --> + <test expect_num_outputs="2"> + <param name="input_sequence" value="bTaeGut7_chr33_mat.fa.gz" ftype="fasta.gz"/> + <param name="canonical" value="TTAGGG"/> + <param name="patterns" value="TTAGGG,CCCTAA"/> + <output name="terminal_telomeres"> + <assert_contents> + <!-- at least one BED-like line (match whole line to allow extra cols) --> + <has_line_matching expression="^\S+\t\d+\t\d+(?:\t.*)?$"/> + </assert_contents> + </output> + <output name="telo_report"> + <assert_contents> + <has_line_matching expression="\+\+\+ Path Summary Report \+\+\+"/> + <has_line_matching expression="\+\+\+ Assembly Summary Report \+\+\+"/> + <has_line_matching expression="\+\+\+ Telomere Statistics \+\+\+"/> + <has_line_matching expression="\+\+\+ Chromosome Telomere Counts\+\+\+"/> + <has_line_matching expression="\+\+\+ Chromosome Telomere/Gap Completeness\+\+\+"/> + </assert_contents> + </output> + </test> + + <!-- 2) Genome-wide (-g -e -r -m -i ; disable -u) --> + <test expect_num_outputs="6"> + <param name="input_sequence" value="bTaeGut7_chr33_mat.fa.gz" ftype="fasta.gz"/> + <param name="canonical" value="TTAGGG"/> + <param name="patterns" value="TTAGGG,CCCTAA"/> + <param name="out_gc" value="true"/> + <param name="out_entropy" value="true"/> + <param name="out_win_repeats" value="true"/> + <param name="out_matches" value="true"/> + <param name="out_its" value="true"/> + <param name="ultra_fast" value="false"/> + + <output name="terminal_telomeres"> + <assert_contents> + <has_line_matching expression="^\S+\t\d+\t\d+(?:\t.*)?$"/> + </assert_contents> + </output> + <output name="interstitial_telomeres"> + <assert_contents> + <has_line_matching expression="^\S+\t\d+\t\d+(?:\t.*)?$"/> + </assert_contents> + </output> + <output name="canonical_matches"> + <assert_contents> + <!-- ensure at least one CCCTAA match line --> + <has_line_matching expression="^\S+\t\d+\t\d+\tCCCTAA(?:\s|$)"/> + </assert_contents> + </output> + <output name="noncanonical_matches"> + <assert_contents> + <!-- expect an empty file --> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="window_metrics"> + <assert_contents> + <has_text text="track type=bedGraph"/> + <!-- first data window line --> + <has_line_matching expression="^\S+\t0\t1000\t.*"/> + </assert_contents> + </output> + <output name="telo_report"> + <assert_contents> + <has_line_matching expression="\+\+\+ Path Summary Report \+\+\+"/> + <has_line_matching expression="\+\+\+ Assembly Summary Report \+\+\+"/> + <has_line_matching expression="\+\+\+ Telomere Statistics \+\+\+"/> + <has_line_matching expression="\+\+\+ Chromosome Telomere Counts\+\+\+"/> + <has_line_matching expression="\+\+\+ Chromosome Telomere/Gap Completeness\+\+\+"/> + </assert_contents> + </output> + </test> + </tests> + + <help><![CDATA[ + Description: + Teloscope is a tool for telomere annotation in genome assemblies. + It scans for user-specified telomeric repeat patterns across assembly paths, contigs and windows. + Teloscope annotates terminal and interstitial telomeres, canonical/noncanonical matches and genome-wide metrics such as GC content, Shannon entropy, and repeat counts. + It generates a detailed telomere summary report for paths, telomere statistics, and chromosome labels to assess telomere completeness. + Teloscope can be used for both complete and fragmented assemblies, providing valuable information for genome manual curation and analysis. + + Usage: + Default (ultra-fast) scans terminal regions and reports terminal telomeres + a summary report. + * ${input_sequence.name}_terminal_telomeres.bed + * ${input_sequence.name}.telo.report + + Enabling window/match options (-g -e -r -m -i) performs a genome-wide scan and produces: + * ${input_sequence.name}_terminal_telomeres.bed + * ${input_sequence.name}_interstitial_telomeres.bed + * ${input_sequence.name}_canonical_matches.bed + * ${input_sequence.name}_noncanonical_matches.bed + * ${input_sequence.name}_window_metrics.bedgraph + * ${input_sequence.name}.telo.report + + Key parameters: + - -c / --canonical: Canonical repeat (default TTAGGG). This is the vertebrate telomeric motif found at chromosome ends that binds to shelterin complex to form a telomere. + - -p / --patterns: Variant patterns (comma-separated). These are additional telomeric repeat motifs to search for, besides the canonical repeat, it includes other variants that can be part of telomeres. + - -w / -s: window size / step (defaults 1000/500) + - -u / --ultra-fast: terminal scan only (default true); disabled automatically when -g/-e/-r/-m/-i are used. + ]]></help> + + <expand macro="citations"/> +</tool>
