Mercurial > repos > iuc > telogator
comparison telogator.xml @ 0:afcb889cbce3 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/telogator2 commit ff18f7a9e15883099ec1cd699533658a280dcf12
| author | iuc |
|---|---|
| date | Thu, 04 Dec 2025 17:09:38 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:afcb889cbce3 |
|---|---|
| 1 <tool id="telogator" name="Telogator" version="@VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> | |
| 2 <description>Measure allele-specific telomere length from long reads</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="edam_ontology"/> | |
| 7 <expand macro="xrefs"/> | |
| 8 <expand macro="requirements"/> | |
| 9 <expand macro="version_command"/> | |
| 10 <command detect_errors="exit_code"><![CDATA[ | |
| 11 #import re | |
| 12 | |
| 13 ## Create output directory | |
| 14 mkdir -p output_dir && | |
| 15 | |
| 16 ## Link input files with proper extensions since it's used to | |
| 17 ## define input types in telogator | |
| 18 #set $input_files = [] | |
| 19 #for $idx, $input_file in enumerate($input_reads) | |
| 20 #set $identifier = str($input_file.element_identifier) | |
| 21 #set $safe_name = re.sub('[^\w\-\.]', '_', $identifier) | |
| 22 ## Add extension only if filename doesn't already have appropriate extension | |
| 23 #if $input_file.is_of_type('fasta.gz') and not ($safe_name.endswith('.fa.gz') or $safe_name.endswith('.fasta.gz')) | |
| 24 #set $safe_name = $safe_name + '.fa.gz' | |
| 25 #elif $input_file.is_of_type('fasta') and not ($safe_name.endswith('.fa') or $safe_name.endswith('.fasta')) | |
| 26 #set $safe_name = $safe_name + '.fa' | |
| 27 #elif $input_file.is_of_type('fastqsanger.gz', 'fastq.gz') and not ($safe_name.endswith('.fq.gz') or $safe_name.endswith('.fastq.gz')) | |
| 28 #set $safe_name = $safe_name + '.fq.gz' | |
| 29 #elif $input_file.is_of_type('fastqsanger', 'fastq') and not ($safe_name.endswith('.fq') or $safe_name.endswith('.fastq')) | |
| 30 #set $safe_name = $safe_name + '.fq' | |
| 31 #elif $input_file.is_of_type('bam') and not $safe_name.endswith('.bam') | |
| 32 #set $safe_name = $safe_name + '.bam' | |
| 33 #elif $input_file.is_of_type('cram') and not $safe_name.endswith('.cram') | |
| 34 #set $safe_name = $safe_name + '.cram' | |
| 35 #end if | |
| 36 ln -sf '${input_file}' '${safe_name}' && | |
| 37 #silent $input_files.append($safe_name) | |
| 38 #end for | |
| 39 | |
| 40 ## Run telogator | |
| 41 telogator2 | |
| 42 -i #echo ' '.join($input_files) | |
| 43 -o output_dir | |
| 44 -r '${read_type}' | |
| 45 -p "\${GALAXY_SLOTS:-1}" | |
| 46 | |
| 47 ## Basic parameters | |
| 48 -l '${basic_params.min_read_length}' | |
| 49 -c '${basic_params.min_canonical_hits}' | |
| 50 -n '${basic_params.min_reads_cluster}' | |
| 51 -m '${basic_params.atl_method}' | |
| 52 #if str($basic_params.downsample) != '' | |
| 53 -d '${basic_params.downsample}' | |
| 54 #end if | |
| 55 #if str($basic_params.random_seed) != '' | |
| 56 --rng '${basic_params.random_seed}' | |
| 57 #end if | |
| 58 | |
| 59 ## Reference files | |
| 60 #if $reference_opts.custom_reference | |
| 61 -t '${reference_opts.custom_reference}' | |
| 62 #end if | |
| 63 #if $reference_opts.kmer_file | |
| 64 -k '${reference_opts.kmer_file}' | |
| 65 #end if | |
| 66 | |
| 67 ## Aligner selection | |
| 68 #if $aligner.aligner_choice == 'minimap2' | |
| 69 --minimap2 minimap2 | |
| 70 #elif $aligner.aligner_choice == 'winnowmap' | |
| 71 --winnowmap winnowmap | |
| 72 #if $aligner.winnowmap_k15 | |
| 73 --winnowmap-k15 '${aligner.winnowmap_k15}' | |
| 74 #end if | |
| 75 #elif $aligner.aligner_choice == 'pbmm2' | |
| 76 --pbmm2 pbmm2 | |
| 77 #end if | |
| 78 | |
| 79 ## Advanced filtering | |
| 80 --filt-tel '${advanced.filtering.filt_tel}' | |
| 81 --filt-nontel '${advanced.filtering.filt_nontel}' | |
| 82 --filt-sub '${advanced.filtering.filt_sub}' | |
| 83 --collapse-hom '${advanced.filtering.collapse_hom}' | |
| 84 | |
| 85 ${advanced.filtering.fast_aln} | |
| 86 | |
| 87 ## Hierarchical clustering parameters | |
| 88 -t0 '${advanced.clustering.t0}' | |
| 89 -t1 '${advanced.clustering.t1}' | |
| 90 -t2 '${advanced.clustering.t2}' | |
| 91 -tc '${advanced.clustering.tc}' | |
| 92 -ts '${advanced.clustering.ts}' | |
| 93 -th '${advanced.clustering.th}' | |
| 94 | |
| 95 ## Plot customization | |
| 96 -afa-x '${advanced.plotting.afa_x}' | |
| 97 -afa-t '${advanced.plotting.afa_t}' | |
| 98 -afa-a '${advanced.plotting.afa_a}' | |
| 99 -va-y '${advanced.plotting.va_y}' | |
| 100 -va-t '${advanced.plotting.va_t}' | |
| 101 -va-p '${advanced.plotting.va_p}' | |
| 102 | |
| 103 ## Move outputs to expected locations | |
| 104 && mv output_dir/tlens_by_allele.tsv '${output_tsv}' | |
| 105 && mv output_dir/all_final_alleles.png '${output_alleles_plot}' | |
| 106 && mv output_dir/violin_atl.png '${output_violin_plot}' | |
| 107 ]]></command> | |
| 108 <inputs> | |
| 109 <param name="input_reads" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz,bam" multiple="true" label="Input reads" help="Long-read sequencing data in FASTA, FASTQ or BAM format. Multiple files can be selected."/> | |
| 110 | |
| 111 <param name="read_type" type="select" label="Read type" help="Sequencing platform type"> | |
| 112 <option value="ont">Oxford Nanopore (ONT)</option> | |
| 113 <option value="hifi" selected="true">PacBio HiFi</option> | |
| 114 </param> | |
| 115 | |
| 116 <section name="basic_params" title="Basic Parameters" expanded="true"> | |
| 117 <param name="min_read_length" argument="-l" type="integer" value="4000" min="0" label="Minimum read length" help="Minimum read length in base pairs"/> | |
| 118 <param name="min_canonical_hits" argument="-c" type="integer" value="8" min="0" label="Minimum canonical kmer hits" help="Minimum hits to tandem canonical kmer"/> | |
| 119 <param name="min_reads_cluster" argument="-n" type="integer" value="3" min="1" label="Minimum reads per cluster" help="Minimum number of reads required per cluster. Recommended: PacBio Revio HiFi (30x): 4, PacBio Sequel II (10x): 3, Nanopore R10 (30x): 4"/> | |
| 120 <param name="atl_method" argument="-m" type="select" label="ATL calculation method" help="Method for calculating allele-specific telomere length"> | |
| 121 <option value="p75" selected="true">75th percentile (p75)</option> | |
| 122 <option value="mean">Mean</option> | |
| 123 <option value="median">Median</option> | |
| 124 <option value="max">Maximum</option> | |
| 125 </param> | |
| 126 <param name="downsample" argument="-d" type="integer" optional="true" value="" label="Downsample telomere reads" help="Downsample to N telomere reads (optional)"/> | |
| 127 <param name="random_seed" argument="--rng" type="integer" optional="true" value="" label="Random seed" help="Random seed value for reproducibility (optional)"/> | |
| 128 </section> | |
| 129 | |
| 130 <section name="reference_opts" title="Reference Options" expanded="false"> | |
| 131 <param name="custom_reference" argument="-t" type="data" format="fasta" optional="true" label="Custom reference FASTA" help="Optional custom telogator reference FASTA file. If not provided, built-in human T2T reference will be used."/> | |
| 132 <param name="kmer_file" argument="-k" type="data" format="tsv" optional="true" label="Telomere kmers file" help="Optional telomere k-mers file. If omitted, a built-in human telomere k-mers file is used."/> | |
| 133 </section> | |
| 134 | |
| 135 <conditional name="aligner"> | |
| 136 <param name="aligner_choice" type="select" label="Alignment tool" help="Select which aligner to use"> | |
| 137 <option value="minimap2" selected="true">minimap2</option> | |
| 138 <option value="winnowmap">winnowmap</option> | |
| 139 <option value="pbmm2">pbmm2</option> | |
| 140 </param> | |
| 141 <when value="minimap2"/> | |
| 142 <when value="winnowmap"> | |
| 143 <param argument="--winnowmap-k15" type="data" format="txt" optional="true" label="Winnowmap k15 file" help="High-frequency kmers file for winnowmap"/> | |
| 144 </when> | |
| 145 <when value="pbmm2"/> | |
| 146 </conditional> | |
| 147 | |
| 148 <section name="advanced" title="Advanced Parameters" expanded="false"> | |
| 149 <section name="filtering" title="Filtering Thresholds" expanded="true"> | |
| 150 <param argument="--filt-tel" type="integer" value="400" min="0" label="Minimum terminating telomere" help="Minimum terminating telomere length in bp"/> | |
| 151 <param argument="--filt-nontel" type="integer" value="100" min="0" label="Maximum terminating non-telomere" help="Maximum terminating non-telomere length in bp"/> | |
| 152 <param argument="--filt-sub" type="integer" value="1000" min="0" label="Minimum terminating subtelomere" help="Minimum terminating subtelomere length in bp"/> | |
| 153 <param argument="--collapse-hom" type="integer" value="1000" min="0" label="Collapse homologous alleles" help="Merge alleles within this distance in bp"/> | |
| 154 <param argument="--fast-aln" type="boolean" truevalue="--fast-aln" falsevalue="" checked="false" label="Use fast alignment" help="Use faster but less accurate pairwise alignment"/> | |
| 155 </section> | |
| 156 | |
| 157 <section name="clustering" title="Hierarchical Clustering (TREECUT) Parameters" expanded="false"> | |
| 158 <param argument="-t0" type="float" value="0.200" min="0" max="1" label="TVR clustering iteration 0" help="Threshold for TVR clustering in iteration 0"/> | |
| 159 <param argument="-t1" type="float" value="0.150" min="0" max="1" label="TVR clustering iteration 1" help="Threshold for TVR clustering in iteration 1"/> | |
| 160 <param argument="-t2" type="float" value="0.100" min="0" max="1" label="TVR clustering iteration 2" help="Threshold for TVR clustering in iteration 2"/> | |
| 161 <param argument="-tc" type="float" value="0.050" min="0" max="1" label="TVR clustering collapse" help="Threshold for collapsing TVR clusters"/> | |
| 162 <param argument="-ts" type="float" value="0.200" min="0" max="1" label="Subtel cluster refinement" help="Threshold for subtelomere cluster refinement"/> | |
| 163 <param argument="-th" type="float" value="0.050" min="0" max="1" label="Collapsing aligned alleles" help="Threshold for collapsing aligned alleles"/> | |
| 164 </section> | |
| 165 | |
| 166 <section name="plotting" title="Plot Customization" expanded="false"> | |
| 167 <param argument="-afa-x" type="integer" value="15000" min="0" label="All alleles plot X-axis max" help="Maximum X-axis value for all final alleles plot"/> | |
| 168 <param argument="-afa-t" type="integer" value="1000" min="0" label="All alleles plot tick steps" help="Tick step size for all final alleles plot"/> | |
| 169 <param argument="-afa-a" type="integer" value="100" min="0" label="Minimum ATL for plot inclusion" help="Minimum allele-specific telomere length for inclusion in all final alleles plot"/> | |
| 170 <param argument="-va-y" type="integer" value="20000" min="0" label="Violin plot Y-axis max" help="Maximum Y-axis value for violin plot"/> | |
| 171 <param argument="-va-t" type="integer" value="5000" min="0" label="Violin plot tick steps" help="Tick step size for violin plot"/> | |
| 172 <param argument="-va-p" type="integer" value="2" min="1" label="Ploidy" help="Number of alleles per chromosome arm (ploidy)"/> | |
| 173 </section> | |
| 174 </section> | |
| 175 </inputs> | |
| 176 <outputs> | |
| 177 <data name="output_tsv" format="tabular" label="${tool.name} on ${on_string}: Telomere lengths by allele"/> | |
| 178 <data name="output_alleles_plot" format="png" label="${tool.name} on ${on_string}: All final alleles plot"/> | |
| 179 <data name="output_violin_plot" format="png" label="${tool.name} on ${on_string}: Violin plot"/> | |
| 180 </outputs> | |
| 181 <tests> | |
| 182 <!-- Test 1: PacBio HiFi data --> | |
| 183 <test expect_num_outputs="3"> | |
| 184 <param name="input_reads" value="hg002-telreads_pacbio.sub.fa.gz"/> | |
| 185 <param name="read_type" value="hifi"/> | |
| 186 <conditional name="aligner"> | |
| 187 <param name="aligner_choice" value="minimap2"/> | |
| 188 </conditional> | |
| 189 <output name="output_tsv"> | |
| 190 <assert_contents> | |
| 191 <has_text text="chr"/> | |
| 192 <has_text text="position"/> | |
| 193 <has_text text="allele_id"/> | |
| 194 <has_text text="TL_p75"/> | |
| 195 <has_n_columns n="11"/> | |
| 196 <has_n_lines n="13" delta="2"/> | |
| 197 <has_line_matching expression="chr\d+[pq]\t\d+.*"/> | |
| 198 </assert_contents> | |
| 199 </output> | |
| 200 <output name="output_alleles_plot"> | |
| 201 <assert_contents> | |
| 202 <has_size min="10000" max="500000"/> | |
| 203 </assert_contents> | |
| 204 </output> | |
| 205 <output name="output_violin_plot"> | |
| 206 <assert_contents> | |
| 207 <has_size min="10000" max="500000"/> | |
| 208 </assert_contents> | |
| 209 </output> | |
| 210 </test> | |
| 211 <!-- Test 2: Oxford Nanopore data, 2 inputs --> | |
| 212 <test expect_num_outputs="3"> | |
| 213 <param name="input_reads" value="hg002-ont-1p.fa.gz,hg002-ont-1p.sub.fa.gz"/> | |
| 214 <param name="read_type" value="ont"/> | |
| 215 <conditional name="aligner"> | |
| 216 <param name="aligner_choice" value="minimap2"/> | |
| 217 </conditional> | |
| 218 <output name="output_tsv"> | |
| 219 <assert_contents> | |
| 220 <has_text text="chr"/> | |
| 221 <has_text text="position"/> | |
| 222 <has_text text="allele_id"/> | |
| 223 <has_text text="TL_p75"/> | |
| 224 <has_n_columns n="11"/> | |
| 225 <has_n_lines n="2" delta="10"/> | |
| 226 </assert_contents> | |
| 227 </output> | |
| 228 <output name="output_alleles_plot"> | |
| 229 <assert_contents> | |
| 230 <has_size min="10000" max="500000"/> | |
| 231 </assert_contents> | |
| 232 </output> | |
| 233 <output name="output_violin_plot"> | |
| 234 <assert_contents> | |
| 235 <has_size min="10000" max="500000"/> | |
| 236 </assert_contents> | |
| 237 </output> | |
| 238 </test> | |
| 239 <!-- Test 3: PacBio HiFi data, pbmm2 --> | |
| 240 <test expect_num_outputs="3"> | |
| 241 <param name="input_reads" value="hg002-telreads_pacbio.sub.fa.gz"/> | |
| 242 <param name="read_type" value="hifi"/> | |
| 243 <conditional name="aligner"> | |
| 244 <param name="aligner_choice" value="pbmm2"/> | |
| 245 </conditional> | |
| 246 <output name="output_tsv"> | |
| 247 <assert_contents> | |
| 248 <has_text text="chr"/> | |
| 249 <has_text text="position"/> | |
| 250 <has_text text="allele_id"/> | |
| 251 <has_text text="TL_p75"/> | |
| 252 <has_n_columns n="11"/> | |
| 253 <has_n_lines n="13" delta="2"/> | |
| 254 </assert_contents> | |
| 255 </output> | |
| 256 <output name="output_alleles_plot"> | |
| 257 <assert_contents> | |
| 258 <has_size min="10000" max="500000"/> | |
| 259 </assert_contents> | |
| 260 </output> | |
| 261 <output name="output_violin_plot"> | |
| 262 <assert_contents> | |
| 263 <has_size min="10000" max="500000"/> | |
| 264 </assert_contents> | |
| 265 </output> | |
| 266 </test> | |
| 267 <!-- Test 4: PacBio HiFi data, winnowmap --> | |
| 268 <test expect_num_outputs="3"> | |
| 269 <param name="input_reads" value="hg002-telreads_pacbio.sub.fa.gz"/> | |
| 270 <param name="read_type" value="hifi"/> | |
| 271 <conditional name="aligner"> | |
| 272 <param name="aligner_choice" value="winnowmap"/> | |
| 273 </conditional> | |
| 274 <output name="output_tsv"> | |
| 275 <assert_contents> | |
| 276 <has_text text="chr"/> | |
| 277 <has_text text="position"/> | |
| 278 <has_text text="allele_id"/> | |
| 279 <has_text text="TL_p75"/> | |
| 280 <has_n_columns n="11"/> | |
| 281 <has_n_lines n="13" delta="2"/> | |
| 282 </assert_contents> | |
| 283 </output> | |
| 284 <output name="output_alleles_plot"> | |
| 285 <assert_contents> | |
| 286 <has_size min="10000" max="500000"/> | |
| 287 </assert_contents> | |
| 288 </output> | |
| 289 <output name="output_violin_plot"> | |
| 290 <assert_contents> | |
| 291 <has_size min="10000" max="500000"/> | |
| 292 </assert_contents> | |
| 293 </output> | |
| 294 </test> | |
| 295 </tests> | |
| 296 <help><![CDATA[ | |
| 297 **What it does** | |
| 298 | |
| 299 Telogator2 measures allele-specific telomere length (ATL) and characterizes telomere variant repeat (TVR) sequences from long-read sequencing data (PacBio HiFi or Oxford Nanopore). | |
| 300 | |
| 301 The tool performs the following analyses: | |
| 302 | |
| 303 1. Extracts reads containing telomeric sequences | |
| 304 2. Aligns reads to reference genome to identify chromosome arms | |
| 305 3. Clusters reads by TVR sequences to identify individual alleles | |
| 306 4. Calculates allele-specific telomere lengths | |
| 307 5. Generates visualizations of telomere length distributions | |
| 308 | |
| 309 **Inputs** | |
| 310 | |
| 311 - Long-read sequencing data (FASTA, FASTQ, BAM, or CRAM format) | |
| 312 - Optional custom reference genome and kmer files | |
| 313 - Platform-specific parameters (PacBio HiFi or Oxford Nanopore) | |
| 314 | |
| 315 **Outputs** | |
| 316 | |
| 317 1. **tlens_by_allele.tsv**: Primary results table containing: | |
| 318 | |
| 319 - chr: Chromosome arm (or chrU for unmapped) | |
| 320 - position: Anchor coordinate | |
| 321 - ref_samp: Reference contig alignment | |
| 322 - allele_id: Allele identifier (suffix 'i' indicates interstitial telomeric regions) | |
| 323 - TL_p75: Allele-specific telomere length (75th percentile by default) | |
| 324 - read_TLs, read_lengths, read_mapq: Per-read metrics | |
| 325 - tvr_len, tvr_consensus: Telomere variant repeat characteristics | |
| 326 - supporting_reads: Read identifiers | |
| 327 | |
| 328 2. **all_final_alleles.png**: Visualization of all identified alleles | |
| 329 | |
| 330 3. **violin_atl.png**: Violin plot showing ATL distributions by chromosome arm | |
| 331 | |
| 332 **Platform-Specific Recommendations** | |
| 333 | |
| 334 - **PacBio Revio HiFi (30x coverage)**: Set minimum reads per cluster to 4 | |
| 335 - **PacBio Sequel II (10x coverage)**: Set minimum reads per cluster to 3 | |
| 336 - **Nanopore R10 (30x coverage)**: Set minimum reads per cluster to 4 | |
| 337 - **Large enrichment datasets**: Increase minimum reads per cluster to 10 | |
| 338 | |
| 339 **Important Notes** | |
| 340 | |
| 341 - For PacBio Revio data, include both "hifi" and "fail" BAM files | |
| 342 - Older Nanopore data (Guppy basecalled) may have high error rates in telomere regions | |
| 343 - Runtime improves with additional CPU cores (increase processes parameter) | |
| 344 - Alleles with suffix 'i' are interstitial telomeric regions and may need to be excluded from downstream analysis | |
| 345 | |
| 346 ]]></help> | |
| 347 <expand macro="citations"/> | |
| 348 </tool> |
