comparison telogator.xml @ 0:afcb889cbce3 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/telogator2 commit ff18f7a9e15883099ec1cd699533658a280dcf12
author iuc
date Thu, 04 Dec 2025 17:09:38 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:afcb889cbce3
1 <tool id="telogator" name="Telogator" version="@VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
2 <description>Measure allele-specific telomere length from long reads</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="edam_ontology"/>
7 <expand macro="xrefs"/>
8 <expand macro="requirements"/>
9 <expand macro="version_command"/>
10 <command detect_errors="exit_code"><![CDATA[
11 #import re
12
13 ## Create output directory
14 mkdir -p output_dir &&
15
16 ## Link input files with proper extensions since it's used to
17 ## define input types in telogator
18 #set $input_files = []
19 #for $idx, $input_file in enumerate($input_reads)
20 #set $identifier = str($input_file.element_identifier)
21 #set $safe_name = re.sub('[^\w\-\.]', '_', $identifier)
22 ## Add extension only if filename doesn't already have appropriate extension
23 #if $input_file.is_of_type('fasta.gz') and not ($safe_name.endswith('.fa.gz') or $safe_name.endswith('.fasta.gz'))
24 #set $safe_name = $safe_name + '.fa.gz'
25 #elif $input_file.is_of_type('fasta') and not ($safe_name.endswith('.fa') or $safe_name.endswith('.fasta'))
26 #set $safe_name = $safe_name + '.fa'
27 #elif $input_file.is_of_type('fastqsanger.gz', 'fastq.gz') and not ($safe_name.endswith('.fq.gz') or $safe_name.endswith('.fastq.gz'))
28 #set $safe_name = $safe_name + '.fq.gz'
29 #elif $input_file.is_of_type('fastqsanger', 'fastq') and not ($safe_name.endswith('.fq') or $safe_name.endswith('.fastq'))
30 #set $safe_name = $safe_name + '.fq'
31 #elif $input_file.is_of_type('bam') and not $safe_name.endswith('.bam')
32 #set $safe_name = $safe_name + '.bam'
33 #elif $input_file.is_of_type('cram') and not $safe_name.endswith('.cram')
34 #set $safe_name = $safe_name + '.cram'
35 #end if
36 ln -sf '${input_file}' '${safe_name}' &&
37 #silent $input_files.append($safe_name)
38 #end for
39
40 ## Run telogator
41 telogator2
42 -i #echo ' '.join($input_files)
43 -o output_dir
44 -r '${read_type}'
45 -p "\${GALAXY_SLOTS:-1}"
46
47 ## Basic parameters
48 -l '${basic_params.min_read_length}'
49 -c '${basic_params.min_canonical_hits}'
50 -n '${basic_params.min_reads_cluster}'
51 -m '${basic_params.atl_method}'
52 #if str($basic_params.downsample) != ''
53 -d '${basic_params.downsample}'
54 #end if
55 #if str($basic_params.random_seed) != ''
56 --rng '${basic_params.random_seed}'
57 #end if
58
59 ## Reference files
60 #if $reference_opts.custom_reference
61 -t '${reference_opts.custom_reference}'
62 #end if
63 #if $reference_opts.kmer_file
64 -k '${reference_opts.kmer_file}'
65 #end if
66
67 ## Aligner selection
68 #if $aligner.aligner_choice == 'minimap2'
69 --minimap2 minimap2
70 #elif $aligner.aligner_choice == 'winnowmap'
71 --winnowmap winnowmap
72 #if $aligner.winnowmap_k15
73 --winnowmap-k15 '${aligner.winnowmap_k15}'
74 #end if
75 #elif $aligner.aligner_choice == 'pbmm2'
76 --pbmm2 pbmm2
77 #end if
78
79 ## Advanced filtering
80 --filt-tel '${advanced.filtering.filt_tel}'
81 --filt-nontel '${advanced.filtering.filt_nontel}'
82 --filt-sub '${advanced.filtering.filt_sub}'
83 --collapse-hom '${advanced.filtering.collapse_hom}'
84
85 ${advanced.filtering.fast_aln}
86
87 ## Hierarchical clustering parameters
88 -t0 '${advanced.clustering.t0}'
89 -t1 '${advanced.clustering.t1}'
90 -t2 '${advanced.clustering.t2}'
91 -tc '${advanced.clustering.tc}'
92 -ts '${advanced.clustering.ts}'
93 -th '${advanced.clustering.th}'
94
95 ## Plot customization
96 -afa-x '${advanced.plotting.afa_x}'
97 -afa-t '${advanced.plotting.afa_t}'
98 -afa-a '${advanced.plotting.afa_a}'
99 -va-y '${advanced.plotting.va_y}'
100 -va-t '${advanced.plotting.va_t}'
101 -va-p '${advanced.plotting.va_p}'
102
103 ## Move outputs to expected locations
104 && mv output_dir/tlens_by_allele.tsv '${output_tsv}'
105 && mv output_dir/all_final_alleles.png '${output_alleles_plot}'
106 && mv output_dir/violin_atl.png '${output_violin_plot}'
107 ]]></command>
108 <inputs>
109 <param name="input_reads" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz,bam" multiple="true" label="Input reads" help="Long-read sequencing data in FASTA, FASTQ or BAM format. Multiple files can be selected."/>
110
111 <param name="read_type" type="select" label="Read type" help="Sequencing platform type">
112 <option value="ont">Oxford Nanopore (ONT)</option>
113 <option value="hifi" selected="true">PacBio HiFi</option>
114 </param>
115
116 <section name="basic_params" title="Basic Parameters" expanded="true">
117 <param name="min_read_length" argument="-l" type="integer" value="4000" min="0" label="Minimum read length" help="Minimum read length in base pairs"/>
118 <param name="min_canonical_hits" argument="-c" type="integer" value="8" min="0" label="Minimum canonical kmer hits" help="Minimum hits to tandem canonical kmer"/>
119 <param name="min_reads_cluster" argument="-n" type="integer" value="3" min="1" label="Minimum reads per cluster" help="Minimum number of reads required per cluster. Recommended: PacBio Revio HiFi (30x): 4, PacBio Sequel II (10x): 3, Nanopore R10 (30x): 4"/>
120 <param name="atl_method" argument="-m" type="select" label="ATL calculation method" help="Method for calculating allele-specific telomere length">
121 <option value="p75" selected="true">75th percentile (p75)</option>
122 <option value="mean">Mean</option>
123 <option value="median">Median</option>
124 <option value="max">Maximum</option>
125 </param>
126 <param name="downsample" argument="-d" type="integer" optional="true" value="" label="Downsample telomere reads" help="Downsample to N telomere reads (optional)"/>
127 <param name="random_seed" argument="--rng" type="integer" optional="true" value="" label="Random seed" help="Random seed value for reproducibility (optional)"/>
128 </section>
129
130 <section name="reference_opts" title="Reference Options" expanded="false">
131 <param name="custom_reference" argument="-t" type="data" format="fasta" optional="true" label="Custom reference FASTA" help="Optional custom telogator reference FASTA file. If not provided, built-in human T2T reference will be used."/>
132 <param name="kmer_file" argument="-k" type="data" format="tsv" optional="true" label="Telomere kmers file" help="Optional telomere k-mers file. If omitted, a built-in human telomere k-mers file is used."/>
133 </section>
134
135 <conditional name="aligner">
136 <param name="aligner_choice" type="select" label="Alignment tool" help="Select which aligner to use">
137 <option value="minimap2" selected="true">minimap2</option>
138 <option value="winnowmap">winnowmap</option>
139 <option value="pbmm2">pbmm2</option>
140 </param>
141 <when value="minimap2"/>
142 <when value="winnowmap">
143 <param argument="--winnowmap-k15" type="data" format="txt" optional="true" label="Winnowmap k15 file" help="High-frequency kmers file for winnowmap"/>
144 </when>
145 <when value="pbmm2"/>
146 </conditional>
147
148 <section name="advanced" title="Advanced Parameters" expanded="false">
149 <section name="filtering" title="Filtering Thresholds" expanded="true">
150 <param argument="--filt-tel" type="integer" value="400" min="0" label="Minimum terminating telomere" help="Minimum terminating telomere length in bp"/>
151 <param argument="--filt-nontel" type="integer" value="100" min="0" label="Maximum terminating non-telomere" help="Maximum terminating non-telomere length in bp"/>
152 <param argument="--filt-sub" type="integer" value="1000" min="0" label="Minimum terminating subtelomere" help="Minimum terminating subtelomere length in bp"/>
153 <param argument="--collapse-hom" type="integer" value="1000" min="0" label="Collapse homologous alleles" help="Merge alleles within this distance in bp"/>
154 <param argument="--fast-aln" type="boolean" truevalue="--fast-aln" falsevalue="" checked="false" label="Use fast alignment" help="Use faster but less accurate pairwise alignment"/>
155 </section>
156
157 <section name="clustering" title="Hierarchical Clustering (TREECUT) Parameters" expanded="false">
158 <param argument="-t0" type="float" value="0.200" min="0" max="1" label="TVR clustering iteration 0" help="Threshold for TVR clustering in iteration 0"/>
159 <param argument="-t1" type="float" value="0.150" min="0" max="1" label="TVR clustering iteration 1" help="Threshold for TVR clustering in iteration 1"/>
160 <param argument="-t2" type="float" value="0.100" min="0" max="1" label="TVR clustering iteration 2" help="Threshold for TVR clustering in iteration 2"/>
161 <param argument="-tc" type="float" value="0.050" min="0" max="1" label="TVR clustering collapse" help="Threshold for collapsing TVR clusters"/>
162 <param argument="-ts" type="float" value="0.200" min="0" max="1" label="Subtel cluster refinement" help="Threshold for subtelomere cluster refinement"/>
163 <param argument="-th" type="float" value="0.050" min="0" max="1" label="Collapsing aligned alleles" help="Threshold for collapsing aligned alleles"/>
164 </section>
165
166 <section name="plotting" title="Plot Customization" expanded="false">
167 <param argument="-afa-x" type="integer" value="15000" min="0" label="All alleles plot X-axis max" help="Maximum X-axis value for all final alleles plot"/>
168 <param argument="-afa-t" type="integer" value="1000" min="0" label="All alleles plot tick steps" help="Tick step size for all final alleles plot"/>
169 <param argument="-afa-a" type="integer" value="100" min="0" label="Minimum ATL for plot inclusion" help="Minimum allele-specific telomere length for inclusion in all final alleles plot"/>
170 <param argument="-va-y" type="integer" value="20000" min="0" label="Violin plot Y-axis max" help="Maximum Y-axis value for violin plot"/>
171 <param argument="-va-t" type="integer" value="5000" min="0" label="Violin plot tick steps" help="Tick step size for violin plot"/>
172 <param argument="-va-p" type="integer" value="2" min="1" label="Ploidy" help="Number of alleles per chromosome arm (ploidy)"/>
173 </section>
174 </section>
175 </inputs>
176 <outputs>
177 <data name="output_tsv" format="tabular" label="${tool.name} on ${on_string}: Telomere lengths by allele"/>
178 <data name="output_alleles_plot" format="png" label="${tool.name} on ${on_string}: All final alleles plot"/>
179 <data name="output_violin_plot" format="png" label="${tool.name} on ${on_string}: Violin plot"/>
180 </outputs>
181 <tests>
182 <!-- Test 1: PacBio HiFi data -->
183 <test expect_num_outputs="3">
184 <param name="input_reads" value="hg002-telreads_pacbio.sub.fa.gz"/>
185 <param name="read_type" value="hifi"/>
186 <conditional name="aligner">
187 <param name="aligner_choice" value="minimap2"/>
188 </conditional>
189 <output name="output_tsv">
190 <assert_contents>
191 <has_text text="chr"/>
192 <has_text text="position"/>
193 <has_text text="allele_id"/>
194 <has_text text="TL_p75"/>
195 <has_n_columns n="11"/>
196 <has_n_lines n="13" delta="2"/>
197 <has_line_matching expression="chr\d+[pq]\t\d+.*"/>
198 </assert_contents>
199 </output>
200 <output name="output_alleles_plot">
201 <assert_contents>
202 <has_size min="10000" max="500000"/>
203 </assert_contents>
204 </output>
205 <output name="output_violin_plot">
206 <assert_contents>
207 <has_size min="10000" max="500000"/>
208 </assert_contents>
209 </output>
210 </test>
211 <!-- Test 2: Oxford Nanopore data, 2 inputs -->
212 <test expect_num_outputs="3">
213 <param name="input_reads" value="hg002-ont-1p.fa.gz,hg002-ont-1p.sub.fa.gz"/>
214 <param name="read_type" value="ont"/>
215 <conditional name="aligner">
216 <param name="aligner_choice" value="minimap2"/>
217 </conditional>
218 <output name="output_tsv">
219 <assert_contents>
220 <has_text text="chr"/>
221 <has_text text="position"/>
222 <has_text text="allele_id"/>
223 <has_text text="TL_p75"/>
224 <has_n_columns n="11"/>
225 <has_n_lines n="2" delta="10"/>
226 </assert_contents>
227 </output>
228 <output name="output_alleles_plot">
229 <assert_contents>
230 <has_size min="10000" max="500000"/>
231 </assert_contents>
232 </output>
233 <output name="output_violin_plot">
234 <assert_contents>
235 <has_size min="10000" max="500000"/>
236 </assert_contents>
237 </output>
238 </test>
239 <!-- Test 3: PacBio HiFi data, pbmm2 -->
240 <test expect_num_outputs="3">
241 <param name="input_reads" value="hg002-telreads_pacbio.sub.fa.gz"/>
242 <param name="read_type" value="hifi"/>
243 <conditional name="aligner">
244 <param name="aligner_choice" value="pbmm2"/>
245 </conditional>
246 <output name="output_tsv">
247 <assert_contents>
248 <has_text text="chr"/>
249 <has_text text="position"/>
250 <has_text text="allele_id"/>
251 <has_text text="TL_p75"/>
252 <has_n_columns n="11"/>
253 <has_n_lines n="13" delta="2"/>
254 </assert_contents>
255 </output>
256 <output name="output_alleles_plot">
257 <assert_contents>
258 <has_size min="10000" max="500000"/>
259 </assert_contents>
260 </output>
261 <output name="output_violin_plot">
262 <assert_contents>
263 <has_size min="10000" max="500000"/>
264 </assert_contents>
265 </output>
266 </test>
267 <!-- Test 4: PacBio HiFi data, winnowmap -->
268 <test expect_num_outputs="3">
269 <param name="input_reads" value="hg002-telreads_pacbio.sub.fa.gz"/>
270 <param name="read_type" value="hifi"/>
271 <conditional name="aligner">
272 <param name="aligner_choice" value="winnowmap"/>
273 </conditional>
274 <output name="output_tsv">
275 <assert_contents>
276 <has_text text="chr"/>
277 <has_text text="position"/>
278 <has_text text="allele_id"/>
279 <has_text text="TL_p75"/>
280 <has_n_columns n="11"/>
281 <has_n_lines n="13" delta="2"/>
282 </assert_contents>
283 </output>
284 <output name="output_alleles_plot">
285 <assert_contents>
286 <has_size min="10000" max="500000"/>
287 </assert_contents>
288 </output>
289 <output name="output_violin_plot">
290 <assert_contents>
291 <has_size min="10000" max="500000"/>
292 </assert_contents>
293 </output>
294 </test>
295 </tests>
296 <help><![CDATA[
297 **What it does**
298
299 Telogator2 measures allele-specific telomere length (ATL) and characterizes telomere variant repeat (TVR) sequences from long-read sequencing data (PacBio HiFi or Oxford Nanopore).
300
301 The tool performs the following analyses:
302
303 1. Extracts reads containing telomeric sequences
304 2. Aligns reads to reference genome to identify chromosome arms
305 3. Clusters reads by TVR sequences to identify individual alleles
306 4. Calculates allele-specific telomere lengths
307 5. Generates visualizations of telomere length distributions
308
309 **Inputs**
310
311 - Long-read sequencing data (FASTA, FASTQ, BAM, or CRAM format)
312 - Optional custom reference genome and kmer files
313 - Platform-specific parameters (PacBio HiFi or Oxford Nanopore)
314
315 **Outputs**
316
317 1. **tlens_by_allele.tsv**: Primary results table containing:
318
319 - chr: Chromosome arm (or chrU for unmapped)
320 - position: Anchor coordinate
321 - ref_samp: Reference contig alignment
322 - allele_id: Allele identifier (suffix 'i' indicates interstitial telomeric regions)
323 - TL_p75: Allele-specific telomere length (75th percentile by default)
324 - read_TLs, read_lengths, read_mapq: Per-read metrics
325 - tvr_len, tvr_consensus: Telomere variant repeat characteristics
326 - supporting_reads: Read identifiers
327
328 2. **all_final_alleles.png**: Visualization of all identified alleles
329
330 3. **violin_atl.png**: Violin plot showing ATL distributions by chromosome arm
331
332 **Platform-Specific Recommendations**
333
334 - **PacBio Revio HiFi (30x coverage)**: Set minimum reads per cluster to 4
335 - **PacBio Sequel II (10x coverage)**: Set minimum reads per cluster to 3
336 - **Nanopore R10 (30x coverage)**: Set minimum reads per cluster to 4
337 - **Large enrichment datasets**: Increase minimum reads per cluster to 10
338
339 **Important Notes**
340
341 - For PacBio Revio data, include both "hifi" and "fail" BAM files
342 - Older Nanopore data (Guppy basecalled) may have high error rates in telomere regions
343 - Runtime improves with additional CPU cores (increase processes parameter)
344 - Alleles with suffix 'i' are interstitial telomeric regions and may need to be excluded from downstream analysis
345
346 ]]></help>
347 <expand macro="citations"/>
348 </tool>