comparison telogator_make_ref.xml @ 0:afcb889cbce3 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/telogator2 commit ff18f7a9e15883099ec1cd699533658a280dcf12
author iuc
date Thu, 04 Dec 2025 17:09:38 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:afcb889cbce3
1 <tool id="telogator_make_ref" name="Telogator Make Reference" version="@VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
2 <description>Create custom telogator reference from a T2T assembly</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="edam_ontology"/>
7 <expand macro="xrefs"/>
8 <expand macro="requirements"/>
9 <expand macro="version_command"/>
10 <command detect_errors="exit_code"><![CDATA[
11 #import re
12 #set $identifier = str($input_fasta.element_identifier)
13 #set $safe_name = re.sub('[^\w\-\.]', '_', $identifier)
14 #if $input_fasta.is_of_type('fasta.gz') and not ($safe_name.endswith('.fa.gz') or $safe_name.endswith('.fasta.gz'))
15 #set $safe_name = $safe_name + '.fa.gz'
16 #elif $input_fasta.is_of_type('fasta') and not ($safe_name.endswith('.fa') or $safe_name.endswith('.fasta'))
17 #set $safe_name = $safe_name + '.fa'
18 #end if
19 mkdir -p output_dir &&
20 ln -sf '${input_fasta}' '${safe_name}' &&
21 make_telogator_ref
22 -i '${safe_name}'
23 -o output_dir/output_ref.fa
24 -s '${sample_name}'
25 -c '${contig_list}'
26 ## Optional kmer file
27 #if $kmer_file
28 -k '${kmer_file}'
29 #end if
30 ## Minimum telomere length
31 -m '${min_tel_length}'
32 ## Optional flags
33 ${add_tel}
34 ${plot}
35 ## Move outputs
36 && mv output_dir/output_ref.fa '${output_fasta}'
37 ]]></command>
38 <inputs>
39 <param name="input_fasta" type="data" format="fasta,fasta.gz" label="Input T2T reference FASTA" help="Telomere-to-telomere reference genome assembly in FASTA format (gzipped supported)"/>
40 <param name="sample_name" argument="-s" type="text" value="sample" label="Sample name" help="Sample name to prepend to contig identifiers in the output">
41 <validator type="regex" message="Sample name must contain only alphanumeric characters and hyphens">^[a-zA-Z0-9-]+$</validator>
42 </param>
43 <param name="contig_list" argument="-c" type="text" value="chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY" label="List of contigs" help="Comma-delimited list of contigs to include. Default is all human chromosomes.">
44 <validator type="empty_field"/>
45 <sanitizer>
46 <valid initial="string.printable">
47 <remove value="&quot;"/>
48 </valid>
49 </sanitizer>
50 </param>
51 <param name="kmer_file" argument="-k" type="data" format="tsv" optional="true" value="" label="Telomere kmers file" help="Optional telomere k-mers file. If omitted, a built-in human telomere k-mers file is used."/>
52 <param name="min_tel_length" argument="-m" type="integer" value="0" min="0" label="Minimum telomere length" help="Minimum telomere length required at contig ends (in base pairs)"/>
53 <param name="add_tel" type="boolean" truevalue="--add-tel" falsevalue="" checked="false" label="Include masked telomeres" help="Include masked telomeres as separate contigs in the output"/>
54 <param name="plot" type="boolean" truevalue="--plot" falsevalue="" checked="false" label="Generate telomere signal plots" help="Generate PNG plots showing telomere signals for each chromosome arm"/>
55 </inputs>
56 <outputs>
57 <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: Reference FASTA"/>
58 <collection name="plots" type="list" label="${tool.name} on ${on_string}: Telomere signal plots">
59 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.png$" directory="output_dir" format="png"/>
60 <filter>plot</filter>
61 </collection>
62 </outputs>
63 <tests>
64 <!-- Test 1: Basic usage with minimal parameters -->
65 <test expect_num_outputs="1">
66 <param name="input_fasta" value="t2t_subset_with_telomeres.fa.gz"/>
67 <param name="sample_name" value="test-sample1"/>
68 <param name="contig_list" value="t2t-i002c-mat_chr11p,t2t-i002c-mat_chr11q,t2t-i002c-mat_chr12p,t2t-i002c-mat_chr12q,t2t-i002c-mat_chr13p,t2t-i002c-mat_chr13q"/>
69 <output name="output_fasta">
70 <assert_contents>
71 <has_text text=">test-sample"/>
72 <has_line_matching expression="^&gt;.*"/>
73 <has_line_matching expression="^[ACGTN]+$"/>
74 <has_size value="6100428" delta="100000"/>
75 <not_has_text text=">test-sample1_tel-"/>
76 </assert_contents>
77 </output>
78 </test>
79 <!-- Test 2: With plot generation -->
80 <test expect_num_outputs="2">
81 <param name="input_fasta" value="t2t_subset_with_telomeres.fa.gz"/>
82 <param name="sample_name" value="test-sample2"/>
83 <param name="plot" value="true"/>
84 <param name="contig_list" value="t2t-i002c-mat_chr11p,t2t-i002c-mat_chr11q,t2t-i002c-mat_chr12p,t2t-i002c-mat_chr12q,t2t-i002c-mat_chr13p,t2t-i002c-mat_chr13q"/>
85 <output name="output_fasta">
86 <assert_contents>
87 <has_text text=">test-sample2"/>
88 </assert_contents>
89 </output>
90 <output_collection name="plots" type="list">
91 <element name="test-sample2_telsignal_t2t-i002c-mat_chr11pp">
92 <assert_contents>
93 <has_size min="10000"/>
94 </assert_contents>
95 </element>
96 <element name="test-sample2_telsignal_t2t-i002c-mat_chr11qq">
97 <assert_contents>
98 <has_size min="10000"/>
99 </assert_contents>
100 </element>
101 <element name="test-sample2_telsignal_t2t-i002c-mat_chr12pp">
102 <assert_contents>
103 <has_size min="10000"/>
104 </assert_contents>
105 </element>
106 <element name="test-sample2_telsignal_t2t-i002c-mat_chr12qq">
107 <assert_contents>
108 <has_size min="10000"/>
109 </assert_contents>
110 </element>
111 <element name="test-sample2_telsignal_t2t-i002c-mat_chr13pp">
112 <assert_contents>
113 <has_size min="10000"/>
114 </assert_contents>
115 </element>
116 <element name="test-sample2_telsignal_t2t-i002c-mat_chr13qq">
117 <assert_contents>
118 <has_size min="10000"/>
119 </assert_contents>
120 </element>
121 </output_collection>
122 </test>
123 <!-- Test 3: use telomere parameters -->
124 <test expect_num_outputs="1">
125 <param name="input_fasta" value="t2t_subset_with_telomeres.fa.gz" />
126 <param name="sample_name" value="test-sample3"/>
127 <param name="min_tel_length" value="1000"/>
128 <param name="add_tel" value="true"/>
129 <param name="contig_list" value="t2t-i002c-mat_chr11p,t2t-i002c-mat_chr11q,t2t-i002c-mat_chr12p,t2t-i002c-mat_chr12q,t2t-i002c-mat_chr13p,t2t-i002c-mat_chr13q"/>
130 <output name="output_fasta">
131 <assert_contents>
132 <has_text text=">test-sample3"/>
133 <has_line_matching expression="^&gt;.*"/>
134 <has_line_matching expression="^[ACGTN]+$"/>
135 <has_size value="4066952" delta="100000"/>
136 <has_text text=">test-sample3_tel-"/>
137 </assert_contents>
138 </output>
139 </test>
140 </tests>
141 <help><![CDATA[
142 **What it does**
143
144 Telogator Make Reference creates a custom telogator reference database from a telomere-to-telomere (T2T) reference genome assembly. This tool is essential for analyzing telomeres in non-human organisms or custom genome assemblies.
145
146 The tool performs the following steps:
147
148 1. Reads the input T2T reference FASTA file
149 2. Identifies telomeric sequences at contig ends
150 3. Optionally filters and remaps contigs
151 4. Creates a processed reference suitable for telogator analysis
152 5. Generates an index file (.fai) for the reference
153 6. Optionally generates visualization plots of telomere signals
154
155 **When to use this tool**
156
157 Use this tool when you need to:
158
159 - Analyze telomeres in non-human organisms (e.g., mouse, maize, other species)
160 - Work with custom or newly assembled T2T genomes
161 - Create a reference from alternative human T2T assemblies (T2T-yao, T2T-cn1, etc.)
162 - Prepare references with specific contig selections or naming conventions
163
164 **Inputs**
165
166 - **T2T reference FASTA**: A telomere-to-telomere reference genome assembly
167 - **Sample name**: Identifier prepended to contig names (use organism/assembly name)
168 - **Contig list**: Comma-delimited list of contigs to include (defaults to all human chromosomes)
169 - **Telomere kmers file** (optional): Custom telomere repeat patterns for non-human organisms
170 - **Minimum telomere length**: Filter contigs by minimum telomere length at ends
171
172 **Outputs**
173
174 1. **Reference FASTA**: Processed telogator reference file ready for use with telogator
175 2. **Reference index (.fai)**: Index file for the created reference FASTA
176 3. **Telomere signal plots** (optional): PNG plots showing telomere signals for each chromosome arm
177
178 **Important Notes**
179
180 - The input FASTA should be a high-quality T2T assembly with telomeres at contig ends
181 - The sample name should be descriptive (e.g., organism name, assembly version), may not contain underscores
182 - The contig list defaults to human chromosomes; modify it for other organisms or custom assemblies
183 - For non-human organisms, provide a telomere kmers file matching the species' telomere repeats
184
185 ]]></help>
186 <expand macro="citations"/>
187 </tool>