Mercurial > repos > iuc > telogator
comparison telogator_make_ref.xml @ 0:afcb889cbce3 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/telogator2 commit ff18f7a9e15883099ec1cd699533658a280dcf12
| author | iuc |
|---|---|
| date | Thu, 04 Dec 2025 17:09:38 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:afcb889cbce3 |
|---|---|
| 1 <tool id="telogator_make_ref" name="Telogator Make Reference" version="@VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> | |
| 2 <description>Create custom telogator reference from a T2T assembly</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="edam_ontology"/> | |
| 7 <expand macro="xrefs"/> | |
| 8 <expand macro="requirements"/> | |
| 9 <expand macro="version_command"/> | |
| 10 <command detect_errors="exit_code"><![CDATA[ | |
| 11 #import re | |
| 12 #set $identifier = str($input_fasta.element_identifier) | |
| 13 #set $safe_name = re.sub('[^\w\-\.]', '_', $identifier) | |
| 14 #if $input_fasta.is_of_type('fasta.gz') and not ($safe_name.endswith('.fa.gz') or $safe_name.endswith('.fasta.gz')) | |
| 15 #set $safe_name = $safe_name + '.fa.gz' | |
| 16 #elif $input_fasta.is_of_type('fasta') and not ($safe_name.endswith('.fa') or $safe_name.endswith('.fasta')) | |
| 17 #set $safe_name = $safe_name + '.fa' | |
| 18 #end if | |
| 19 mkdir -p output_dir && | |
| 20 ln -sf '${input_fasta}' '${safe_name}' && | |
| 21 make_telogator_ref | |
| 22 -i '${safe_name}' | |
| 23 -o output_dir/output_ref.fa | |
| 24 -s '${sample_name}' | |
| 25 -c '${contig_list}' | |
| 26 ## Optional kmer file | |
| 27 #if $kmer_file | |
| 28 -k '${kmer_file}' | |
| 29 #end if | |
| 30 ## Minimum telomere length | |
| 31 -m '${min_tel_length}' | |
| 32 ## Optional flags | |
| 33 ${add_tel} | |
| 34 ${plot} | |
| 35 ## Move outputs | |
| 36 && mv output_dir/output_ref.fa '${output_fasta}' | |
| 37 ]]></command> | |
| 38 <inputs> | |
| 39 <param name="input_fasta" type="data" format="fasta,fasta.gz" label="Input T2T reference FASTA" help="Telomere-to-telomere reference genome assembly in FASTA format (gzipped supported)"/> | |
| 40 <param name="sample_name" argument="-s" type="text" value="sample" label="Sample name" help="Sample name to prepend to contig identifiers in the output"> | |
| 41 <validator type="regex" message="Sample name must contain only alphanumeric characters and hyphens">^[a-zA-Z0-9-]+$</validator> | |
| 42 </param> | |
| 43 <param name="contig_list" argument="-c" type="text" value="chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY" label="List of contigs" help="Comma-delimited list of contigs to include. Default is all human chromosomes."> | |
| 44 <validator type="empty_field"/> | |
| 45 <sanitizer> | |
| 46 <valid initial="string.printable"> | |
| 47 <remove value="""/> | |
| 48 </valid> | |
| 49 </sanitizer> | |
| 50 </param> | |
| 51 <param name="kmer_file" argument="-k" type="data" format="tsv" optional="true" value="" label="Telomere kmers file" help="Optional telomere k-mers file. If omitted, a built-in human telomere k-mers file is used."/> | |
| 52 <param name="min_tel_length" argument="-m" type="integer" value="0" min="0" label="Minimum telomere length" help="Minimum telomere length required at contig ends (in base pairs)"/> | |
| 53 <param name="add_tel" type="boolean" truevalue="--add-tel" falsevalue="" checked="false" label="Include masked telomeres" help="Include masked telomeres as separate contigs in the output"/> | |
| 54 <param name="plot" type="boolean" truevalue="--plot" falsevalue="" checked="false" label="Generate telomere signal plots" help="Generate PNG plots showing telomere signals for each chromosome arm"/> | |
| 55 </inputs> | |
| 56 <outputs> | |
| 57 <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: Reference FASTA"/> | |
| 58 <collection name="plots" type="list" label="${tool.name} on ${on_string}: Telomere signal plots"> | |
| 59 <discover_datasets pattern="(?P<designation>.+)\.png$" directory="output_dir" format="png"/> | |
| 60 <filter>plot</filter> | |
| 61 </collection> | |
| 62 </outputs> | |
| 63 <tests> | |
| 64 <!-- Test 1: Basic usage with minimal parameters --> | |
| 65 <test expect_num_outputs="1"> | |
| 66 <param name="input_fasta" value="t2t_subset_with_telomeres.fa.gz"/> | |
| 67 <param name="sample_name" value="test-sample1"/> | |
| 68 <param name="contig_list" value="t2t-i002c-mat_chr11p,t2t-i002c-mat_chr11q,t2t-i002c-mat_chr12p,t2t-i002c-mat_chr12q,t2t-i002c-mat_chr13p,t2t-i002c-mat_chr13q"/> | |
| 69 <output name="output_fasta"> | |
| 70 <assert_contents> | |
| 71 <has_text text=">test-sample"/> | |
| 72 <has_line_matching expression="^>.*"/> | |
| 73 <has_line_matching expression="^[ACGTN]+$"/> | |
| 74 <has_size value="6100428" delta="100000"/> | |
| 75 <not_has_text text=">test-sample1_tel-"/> | |
| 76 </assert_contents> | |
| 77 </output> | |
| 78 </test> | |
| 79 <!-- Test 2: With plot generation --> | |
| 80 <test expect_num_outputs="2"> | |
| 81 <param name="input_fasta" value="t2t_subset_with_telomeres.fa.gz"/> | |
| 82 <param name="sample_name" value="test-sample2"/> | |
| 83 <param name="plot" value="true"/> | |
| 84 <param name="contig_list" value="t2t-i002c-mat_chr11p,t2t-i002c-mat_chr11q,t2t-i002c-mat_chr12p,t2t-i002c-mat_chr12q,t2t-i002c-mat_chr13p,t2t-i002c-mat_chr13q"/> | |
| 85 <output name="output_fasta"> | |
| 86 <assert_contents> | |
| 87 <has_text text=">test-sample2"/> | |
| 88 </assert_contents> | |
| 89 </output> | |
| 90 <output_collection name="plots" type="list"> | |
| 91 <element name="test-sample2_telsignal_t2t-i002c-mat_chr11pp"> | |
| 92 <assert_contents> | |
| 93 <has_size min="10000"/> | |
| 94 </assert_contents> | |
| 95 </element> | |
| 96 <element name="test-sample2_telsignal_t2t-i002c-mat_chr11qq"> | |
| 97 <assert_contents> | |
| 98 <has_size min="10000"/> | |
| 99 </assert_contents> | |
| 100 </element> | |
| 101 <element name="test-sample2_telsignal_t2t-i002c-mat_chr12pp"> | |
| 102 <assert_contents> | |
| 103 <has_size min="10000"/> | |
| 104 </assert_contents> | |
| 105 </element> | |
| 106 <element name="test-sample2_telsignal_t2t-i002c-mat_chr12qq"> | |
| 107 <assert_contents> | |
| 108 <has_size min="10000"/> | |
| 109 </assert_contents> | |
| 110 </element> | |
| 111 <element name="test-sample2_telsignal_t2t-i002c-mat_chr13pp"> | |
| 112 <assert_contents> | |
| 113 <has_size min="10000"/> | |
| 114 </assert_contents> | |
| 115 </element> | |
| 116 <element name="test-sample2_telsignal_t2t-i002c-mat_chr13qq"> | |
| 117 <assert_contents> | |
| 118 <has_size min="10000"/> | |
| 119 </assert_contents> | |
| 120 </element> | |
| 121 </output_collection> | |
| 122 </test> | |
| 123 <!-- Test 3: use telomere parameters --> | |
| 124 <test expect_num_outputs="1"> | |
| 125 <param name="input_fasta" value="t2t_subset_with_telomeres.fa.gz" /> | |
| 126 <param name="sample_name" value="test-sample3"/> | |
| 127 <param name="min_tel_length" value="1000"/> | |
| 128 <param name="add_tel" value="true"/> | |
| 129 <param name="contig_list" value="t2t-i002c-mat_chr11p,t2t-i002c-mat_chr11q,t2t-i002c-mat_chr12p,t2t-i002c-mat_chr12q,t2t-i002c-mat_chr13p,t2t-i002c-mat_chr13q"/> | |
| 130 <output name="output_fasta"> | |
| 131 <assert_contents> | |
| 132 <has_text text=">test-sample3"/> | |
| 133 <has_line_matching expression="^>.*"/> | |
| 134 <has_line_matching expression="^[ACGTN]+$"/> | |
| 135 <has_size value="4066952" delta="100000"/> | |
| 136 <has_text text=">test-sample3_tel-"/> | |
| 137 </assert_contents> | |
| 138 </output> | |
| 139 </test> | |
| 140 </tests> | |
| 141 <help><![CDATA[ | |
| 142 **What it does** | |
| 143 | |
| 144 Telogator Make Reference creates a custom telogator reference database from a telomere-to-telomere (T2T) reference genome assembly. This tool is essential for analyzing telomeres in non-human organisms or custom genome assemblies. | |
| 145 | |
| 146 The tool performs the following steps: | |
| 147 | |
| 148 1. Reads the input T2T reference FASTA file | |
| 149 2. Identifies telomeric sequences at contig ends | |
| 150 3. Optionally filters and remaps contigs | |
| 151 4. Creates a processed reference suitable for telogator analysis | |
| 152 5. Generates an index file (.fai) for the reference | |
| 153 6. Optionally generates visualization plots of telomere signals | |
| 154 | |
| 155 **When to use this tool** | |
| 156 | |
| 157 Use this tool when you need to: | |
| 158 | |
| 159 - Analyze telomeres in non-human organisms (e.g., mouse, maize, other species) | |
| 160 - Work with custom or newly assembled T2T genomes | |
| 161 - Create a reference from alternative human T2T assemblies (T2T-yao, T2T-cn1, etc.) | |
| 162 - Prepare references with specific contig selections or naming conventions | |
| 163 | |
| 164 **Inputs** | |
| 165 | |
| 166 - **T2T reference FASTA**: A telomere-to-telomere reference genome assembly | |
| 167 - **Sample name**: Identifier prepended to contig names (use organism/assembly name) | |
| 168 - **Contig list**: Comma-delimited list of contigs to include (defaults to all human chromosomes) | |
| 169 - **Telomere kmers file** (optional): Custom telomere repeat patterns for non-human organisms | |
| 170 - **Minimum telomere length**: Filter contigs by minimum telomere length at ends | |
| 171 | |
| 172 **Outputs** | |
| 173 | |
| 174 1. **Reference FASTA**: Processed telogator reference file ready for use with telogator | |
| 175 2. **Reference index (.fai)**: Index file for the created reference FASTA | |
| 176 3. **Telomere signal plots** (optional): PNG plots showing telomere signals for each chromosome arm | |
| 177 | |
| 178 **Important Notes** | |
| 179 | |
| 180 - The input FASTA should be a high-quality T2T assembly with telomeres at contig ends | |
| 181 - The sample name should be descriptive (e.g., organism name, assembly version), may not contain underscores | |
| 182 - The contig list defaults to human chromosomes; modify it for other organisms or custom assemblies | |
| 183 - For non-human organisms, provide a telomere kmers file matching the species' telomere repeats | |
| 184 | |
| 185 ]]></help> | |
| 186 <expand macro="citations"/> | |
| 187 </tool> |
