comparison chromap.xml @ 0:61fa9655ab32 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/chromap commit 392fc1bebfff21996c13ba0edb952b5f3784cca2
author iuc
date Tue, 17 Feb 2026 19:09:08 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:61fa9655ab32
1 <tool id="chromap" name="chromap" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Fast alignment and preprocessing of chromatin profiles</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <command detect_errors="exit_code"><![CDATA[
8 ## Step 1: Build index from reference FASTA
9 chromap
10 -i
11 -r '$input_options.ref'
12 -o chromap_index
13 -k $index_options.kmer
14 -w $index_options.window
15 #if $index_options.min_frag_length
16 --min-frag-length $index_options.min_frag_length
17 #end if
18 &&
19
20 ## Step 2: Map reads using built index
21 chromap
22 --preset $mapping_options.preset
23 #if $input_options.read_type.input_reads_type == 'single'
24 #set reads = $input_options.read_type.single_read
25 -1 #echo ' '.join(["'%s'" % f for f in str($reads).split(',')])#
26 #else
27 -1 '$input_options.read_type.paired_collection.forward'
28 -2 '$input_options.read_type.paired_collection.reverse'
29 #end if
30
31 ## --- Reference and index ---
32 -r '$input_options.ref'
33 -x chromap_index
34
35 ## --- Optional barcode inputs ---
36 #if $input_options.barcode
37 -b '$input_options.barcode'
38 #end if
39 #if $input_options.barcode_whitelist
40 --barcode-whitelist '$input_options.barcode_whitelist'
41 #end if
42 #if $input_options.read_format
43 --read-format '$input_options.read_format'
44 #end if
45 #if $input_options.barcode_translate
46 --barcode-translate '$input_options.barcode_translate'
47 #end if
48
49 ## --- Mapping options ---
50 $mapping_options.split_alignment
51 --error-threshold $mapping_options.error_threshold
52 --min-num-seeds $mapping_options.min_num_seeds
53 #if $mapping_options.max_seed_frequencies
54 --max-seed-frequencies '$mapping_options.max_seed_frequencies'
55 #end if
56 --max-insert-size $mapping_options.max_insert_size
57 --MAPQ-threshold $mapping_options.MAPQ_threshold
58 --min-read-length $mapping_options.min_read_length
59 $mapping_options.trim_adapters
60 $mapping_options.Tn5_shift
61 #if $mapping_options.bc_error_threshold
62 --bc-error-threshold $mapping_options.bc_error_threshold
63 #end if
64 #if $mapping_options.bc_probability_threshold
65 --bc-probability-threshold $mapping_options.bc_probability_threshold
66 #end if
67 #if $mapping_options.chr_order
68 --chr-order '$mapping_options.chr_order'
69 #end if
70 #if $mapping_options.pairs_natural_chr_order
71 --pairs-natural-chr-order '$mapping_options.pairs_natural_chr_order'
72 #end if
73
74 ## --- Output format ---
75 $output_options.out_format
76 #if $output_options.summary
77 --summary '$summary_out'
78 #end if
79 -t "\${GALAXY_SLOTS:-8}"
80 -o '$mapping_out'
81
82 ]]></command>
83 <inputs>
84 <!-- Input Options -->
85 <section name="input_options" title="Input options" expanded="true">
86 <conditional name="read_type">
87 <param name="input_reads_type" type="select" label="Select the Input read type">
88 <option value="single" selected="true">Single-end</option>
89 <option value="paired">Paired-end collection</option>
90 </param>
91 <when value="single">
92 <param name="single_read" type="data" format="fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz" multiple="true" label="Single Read"/>
93 </when>
94 <when value="paired">
95 <param name="paired_collection" type="data_collection" collection_type="paired" label="Paired reads collection" help="Select a paired collection containing forward and reverse reads."/>
96 </when>
97 </conditional>
98 <param argument="--ref" type="data" format="fasta" label="Reference (FASTA)"/>
99 <param argument="--barcode" type="data" format="fastq,fastq.gz" label="Barcode file" optional="true"/>
100 <param argument="--barcode-whitelist" type="data" format="txt" label="Barcode whitelist file" optional="true"/>
101 <param argument="--read-format" type="text" optional="true" label="Read/barcode format string" help='Example: "r1:0:-1,bc:0:-1" (10x single-end)'/>
102 <param argument="--barcode-translate" type="data" format="tabular" label="Barcode translate file" optional="true"/>
103 </section>
104
105 <!-- Indexing Options -->
106 <section name="index_options" title="Indexing options" expanded="false">
107 <param argument="--min-frag-length" type="integer" optional="true" value="30" label="Min fragment length for choosing kmer length and window automatically" help="chromap --min-frag-length (default 30)"/>
108 <param argument="--kmer" type="integer" value="17" label="K-mer length"/>
109 <param argument="--window" type="integer" value="7" label="Window size"/>
110 </section>
111
112 <!-- Mapping Options -->
113 <section name="mapping_options" title="Mapping" expanded="false">
114 <param argument="--preset" type="select" label="Preset" help="Preset parameters for mapping reads">
115 <option value="atac">atac (ATAC-seq/scATAC-seq)</option>
116 <option value="chip">chip (ChIP-seq)</option>
117 <option value="hic">hic (Hi-C)</option>
118 </param>
119 <param argument="--split-alignment" type="boolean" label="Allow split alignments" truevalue="--split-alignment" falsevalue="" checked="false"/>
120 <param argument="--error-threshold" type="integer" value="8" label="Max errors allowed"/>
121 <param argument="--min-num-seeds" type="integer" value="2" label="Min number of seeds"/>
122 <param argument="--max-seed-frequencies" type="text" optional="true" value="500,1000" label="Max seed frequencies" help="Comma-separated(default 500,1000)"/>
123 <param argument="--max-insert-size" type="integer" value="1000" label="Max insert size (only for paired-end read mapping)"/>
124 <param argument="--MAPQ-threshold" type="integer" value="30" min="0" max="60" label="Min MAPQ (-q)"/>
125 <param argument="--min-read-length" type="integer" value="30" label="Minimum read length"/>
126 <param argument="--trim-adapters" type="boolean" label="Trim adapters on 3' (--trim-adapters)" truevalue="--trim-adapters" falsevalue="" checked="false"/>
127 <param argument="--Tn5-shift" type="boolean" label="Perform Tn5 shift" truevalue="--Tn5-shift" falsevalue="" checked="false"/>
128 <param argument="--bc-error-threshold" type="integer" optional="true" value="1" label="Barcode error threshold"/>
129 <param argument="--bc-probability-threshold" type="float" optional="true" value="0.9" label="Barcode probability threshold"/>
130 <param argument="--chr-order" type="data" format="tabular" label="Custom chromosome order" optional="true"/>
131 <param argument="--pairs-natural-chr-order" type="data" format="tabular" label="Chrom order for pairs flipping" optional="true"/>
132
133 </section>
134
135 <!-- Output Options -->
136 <section name="output_options" title="Output" expanded="true">
137 <param name="out_format" type="select" label="Output format">
138 <option value="--SAM">SAM</option>
139 <option value="--BED" selected="true">BED/BEDPE</option>
140 <option value="--TagAlign">TagAlign/PairedTagAlign</option>
141 <option value="--pairs">4dn pairs</option>
142 </param>
143 <param name="summary" type="boolean" label="Produce summary file" truevalue="--summary" falsevalue="" checked="true"/>
144 </section>
145 </inputs>
146
147 <outputs>
148 <!-- Mapping primary output; actual datatype depends on out_format -->
149 <data name="mapping_out" format="bed" label="${tool.name} on ${on_string}: Mapping output">
150 <change_format>
151 <when input="output_options.out_format" value="--SAM" format="sam"/>
152 <when input="output_options.out_format" value="--BED" format="bed"/>
153 <when input="output_options.out_format" value="--TagAlign" format="tabular"/>
154 <when input="output_options.out_format" value="--pairs" format="4dn_pairs"/>
155 </change_format>
156 </data>
157 <data name="summary_out" format="txt" label="${tool.name} on ${on_string}: Summary">
158 <filter>output_options['summary']</filter>
159 </data>
160 </outputs>
161
162 <tests>
163 <!-- Test 1: Paired-end ChIP-seq, BED output, with summary. -->
164 <test expect_num_outputs="2">
165 <section name="input_options">
166 <conditional name="read_type">
167 <param name="input_reads_type" value="paired"/>
168 <param name="paired_collection">
169 <collection type="paired">
170 <element name="forward" value="read1.fq"/>
171 <element name="reverse" value="read2.fq"/>
172 </collection>
173 </param>
174 </conditional>
175 <param name="ref" value="ref.fa" ftype="fasta"/>
176 </section>
177 <section name="index_options">
178 <param name="kmer" value="17"/>
179 <param name="window" value="7"/>
180 </section>
181 <section name="mapping_options">
182 <param name="preset" value="chip"/>
183 <param name="split_alignment" value="false"/>
184 <param name="error_threshold" value="8"/>
185 <param name="min_num_seeds" value="2"/>
186 <param name="max_insert_size" value="1000"/>
187 <param name="MAPQ_threshold" value="30"/>
188 <param name="min_read_length" value="30"/>
189 <param name="trim_adapters" value="false"/>
190 <param name="Tn5_shift" value="false"/>
191 </section>
192 <section name="output_options">
193 <param name="out_format" value="--BED"/>
194 <param name="summary" value="true"/>
195 </section>
196 <output name="mapping_out" file="test01_mapping.bed" ftype="bed"/>
197 <output name="summary_out" file="test01_summary.txt" ftype="txt"/>
198 </test>
199
200 <!-- Test 2: Single-end ATAC-seq, SAM output, Tn5 shift and adapter trimming enabled, no summary. -->
201 <test expect_num_outputs="1">
202 <section name="input_options">
203 <conditional name="read_type">
204 <param name="input_reads_type" value="single"/>
205 <param name="single_read" value="read1_se.fq"/>
206 </conditional>
207 <param name="ref" value="ref.fa" ftype="fasta"/>
208 </section>
209 <section name="index_options">
210 <param name="kmer" value="17"/>
211 <param name="window" value="7"/>
212 </section>
213 <section name="mapping_options">
214 <param name="preset" value="atac"/>
215 <param name="split_alignment" value="false"/>
216 <param name="error_threshold" value="8"/>
217 <param name="min_num_seeds" value="2"/>
218 <param name="max_insert_size" value="1000"/>
219 <param name="MAPQ_threshold" value="0"/>
220 <param name="min_read_length" value="30"/>
221 <param name="trim_adapters" value="true"/>
222 <param name="Tn5_shift" value="true"/>
223 </section>
224 <section name="output_options">
225 <param name="out_format" value="--SAM"/>
226 <param name="summary" value="false"/>
227 </section>
228 <output name="mapping_out" file="test02_mapping.sam" ftype="sam"/>
229 </test>
230
231 <!-- Test 3: Paired-end Hi-C, TagAlign output, split alignments on -->
232 <test expect_num_outputs="1">
233 <section name="input_options">
234 <conditional name="read_type">
235 <param name="input_reads_type" value="paired"/>
236 <param name="paired_collection">
237 <collection type="paired">
238 <element name="forward" value="read1.fq"/>
239 <element name="reverse" value="read2.fq"/>
240 </collection>
241 </param>
242 </conditional>
243 <param name="ref" value="ref.fa" ftype="fasta"/>
244 </section>
245 <section name="index_options">
246 <param name="kmer" value="17"/>
247 <param name="window" value="7"/>
248 </section>
249 <section name="mapping_options">
250 <param name="preset" value="hic"/>
251 <param name="split_alignment" value="true"/>
252 <param name="error_threshold" value="8"/>
253 <param name="min_num_seeds" value="2"/>
254 <param name="max_insert_size" value="1000"/>
255 <param name="MAPQ_threshold" value="0"/>
256 <param name="min_read_length" value="30"/>
257 <param name="trim_adapters" value="false"/>
258 <param name="Tn5_shift" value="false"/>
259 </section>
260 <section name="output_options">
261 <param name="out_format" value="--TagAlign"/>
262 <param name="summary" value="false"/>
263 </section>
264 <output name="mapping_out" file="test03_mapping.tsv" ftype="tabular"/>
265 </test>
266
267 <!-- Test 4: Paired-end Hi-C, 4DN pairs output, preset hic, pairs format, summary off -->
268 <test expect_num_outputs="1">
269 <section name="input_options">
270 <conditional name="read_type">
271 <param name="input_reads_type" value="paired"/>
272 <param name="paired_collection">
273 <collection type="paired">
274 <element name="forward" value="read1.fq"/>
275 <element name="reverse" value="read2.fq"/>
276 </collection>
277 </param>
278 </conditional>
279 <param name="ref" value="ref.fa" ftype="fasta"/>
280 </section>
281 <section name="index_options">
282 <param name="kmer" value="17"/>
283 <param name="window" value="7"/>
284 </section>
285 <section name="mapping_options">
286 <param name="preset" value="hic"/>
287 <param name="split_alignment" value="false"/>
288 <param name="error_threshold" value="8"/>
289 <param name="min_num_seeds" value="2"/>
290 <param name="max_insert_size" value="2000"/>
291 <param name="MAPQ_threshold" value="0"/>
292 <param name="min_read_length" value="30"/>
293 <param name="trim_adapters" value="false"/>
294 <param name="Tn5_shift" value="false"/>
295 </section>
296 <section name="output_options">
297 <param name="out_format" value="--pairs"/>
298 <param name="summary" value="false"/>
299 </section>
300 <output name="mapping_out" file="test04_mapping.pairs" ftype="4dn_pairs"/>
301 </test>
302
303 <!-- Test 5: Single-end scATAC with barcode file and whitelist -->
304 <test expect_num_outputs="2">
305 <section name="input_options">
306 <conditional name="read_type">
307 <param name="input_reads_type" value="single"/>
308 <param name="single_read" value="read1_se.fq"/>
309 </conditional>
310 <param name="ref" value="ref.fa" ftype="fasta"/>
311 <param name="barcode" value="barcode.fq"/>
312 <param name="barcode_whitelist" value="whitelist.txt"/>
313 <param name="read_format" value="r1:0:-1,bc:0:-1"/>
314 </section>
315 <section name="index_options">
316 <param name="kmer" value="17"/>
317 <param name="window" value="7"/>
318 </section>
319 <section name="mapping_options">
320 <param name="preset" value="atac"/>
321 <param name="split_alignment" value="false"/>
322 <param name="error_threshold" value="8"/>
323 <param name="min_num_seeds" value="2"/>
324 <param name="max_insert_size" value="1000"/>
325 <param name="MAPQ_threshold" value="0"/>
326 <param name="min_read_length" value="30"/>
327 <param name="trim_adapters" value="false"/>
328 <param name="Tn5_shift" value="false"/>
329 <param name="bc_error_threshold" value="1"/>
330 <param name="bc_probability_threshold" value="0.9"/>
331 </section>
332 <section name="output_options">
333 <param name="out_format" value="--BED"/>
334 <param name="summary" value="true"/>
335 </section>
336 <output name="mapping_out" file="test05_mapping.bed" ftype="bed"/>
337 <output name="summary_out" file="test05_summary.txt" ftype="txt"/>
338 </test>
339
340 <!-- Test 6: Single-end ATAC, relaxed MAPQ (threshold=0), custom kmer/window, no summary -->
341 <test expect_num_outputs="1">
342 <section name="input_options">
343 <conditional name="read_type">
344 <param name="input_reads_type" value="single"/>
345 <param name="single_read" value="read1_se.fq"/>
346 </conditional>
347 <param name="ref" value="ref.fa" ftype="fasta"/>
348 </section>
349 <section name="index_options">
350 <param name="kmer" value="15"/>
351 <param name="window" value="5"/>
352 </section>
353 <section name="mapping_options">
354 <param name="preset" value="atac"/>
355 <param name="split_alignment" value="false"/>
356 <param name="error_threshold" value="8"/>
357 <param name="min_num_seeds" value="2"/>
358 <param name="max_insert_size" value="1000"/>
359 <param name="MAPQ_threshold" value="0"/>
360 <param name="min_read_length" value="30"/>
361 <param name="trim_adapters" value="false"/>
362 <param name="Tn5_shift" value="false"/>
363 </section>
364 <section name="output_options">
365 <param name="out_format" value="--BED"/>
366 <param name="summary" value="false"/>
367 </section>
368 <output name="mapping_out" file="test06_mapping.bed" ftype="bed"/>
369 </test>
370 </tests>
371
372 <help><![CDATA[
373
374 **chromap** is a fast aligner and preprocessor for chromatin profiling data (ATAC-seq, ChIP-seq, Hi-C and their single-cell variants).
375
376 -----
377
378 **Inputs**
379
380 *Reads* : Provide either single-end FASTQ files or a paired-end collection. Multiple single-end files can be selected and will be processed together.
381
382 *Reference* : A reference genome in FASTA format. The index is built automatically — no separate indexing step is needed.
383
384 *Barcode file* (optional) : For single-cell experiments, provide a FASTQ file containing cell barcode sequences. Use the **Read/barcode format string** to describe how reads and barcodes are distributed across files. The default ``r1:0:-1,bc:0:-1`` corresponds to 10x Genomics single-end layout.
385
386 *Barcode whitelist* (optional) : A plain-text file of known valid barcodes (one per line). Barcodes not in the list will be corrected if within the Hamming distance set by **Barcode error threshold**. Without a whitelist, all barcodes are passed through uncorrected.
387
388 -----
389
390 **Preset**
391
392 Presets load recommended parameter bundles for each assay type. They are applied first; any parameter you set explicitly will override the preset value.
393
394 - *atac* - ATAC-seq / scATAC-seq
395 - *chip* - ChIP-seq
396 - *hic* - Hi-C
397
398 -----
399
400 **Indexing options**
401
402 These control the minimiser index built from the reference before mapping.
403
404 - **K-mer length** (default 17) and **Window size** (default 7) together determine index density and sensitivity. Shorter k-mers or smaller windows increase sensitivity at the cost of speed and memory.
405 - **Min fragment length** : if set, chromap automatically chooses k and w to suit the expected fragment size, ignoring the manual values above.
406
407 -----
408
409 **Key mapping parameters**
410
411 - **Tn5 shift** : shifts read 5′ ends by +4 bp (forward) or −5 bp (reverse) to centre on the Tn5 insertion site. Enable this for ATAC-seq when calling peaks with MACS2 or similar tools.
412 - **Trim adapters** : detects and removes 3′ adapter sequence before alignment. Useful when reads extend beyond short inserts.
413 - **Split alignments** : allows a read to align as two separate segments. Required for Hi-C reads spanning a ligation junction.
414 - **Min MAPQ** (default 30) : alignments below this mapping quality are excluded from the output. Set to 0 to retain all alignments.
415 - **Max insert size** (default 1000) : paired-end only. Read pairs with an inferred insert size above this value are not reported.
416 - **Max errors** (default 8) : maximum mismatches/indels allowed in a reported alignment.
417 - **Max seed frequencies** (default ``500,1000``) : seeds found more often than these thresholds are skipped as repetitive. Reducing these values speeds up mapping in repetitive genomes at the cost of sensitivity.
418
419 -----
420
421
422 **Output formats**
423
424 Based on the selected options, output can be: SAM, BED / BEDPE, TagAlign, or 4DN pairs format.
425
426 *Summary file* : when enabled, produces a CSV with per-barcode (or bulk) alignment statistics including total reads, duplicates, unmapped, low-MAPQ counts, and an estimated FRiP score.
427
428
429 -----
430
431 **Tips**
432
433 - For bulk ATAC-seq peak calling, use the ``atac`` preset with **Tn5 shift** enabled and **BED** output.
434 - For scATAC-seq, add a barcode file and whitelist; the summary CSV will contain one row per cell barcode.
435 - For Hi-C contact matrix generation, use the ``hic`` preset with **4DN pairs** output and enable **Split alignments**.
436
437
438 ]]></help>
439 <expand macro="citations"/>
440 <expand macro="creator"/>
441 </tool>