changeset 0:61fa9655ab32 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/chromap commit 392fc1bebfff21996c13ba0edb952b5f3784cca2
author iuc
date Tue, 17 Feb 2026 19:09:08 +0000
parents
children
files chromap.xml macros.xml test-data/barcode.fq test-data/read1.fq test-data/read1_se.fq test-data/read2.fq test-data/ref.fa test-data/test01_mapping.bed test-data/test01_summary.txt test-data/test02_mapping.sam test-data/test03_mapping.tsv test-data/test04_mapping.pairs test-data/test05_mapping.bed test-data/test05_summary.txt test-data/test06_mapping.bed test-data/whitelist.txt
diffstat 16 files changed, 689 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromap.xml	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,441 @@
+<tool id="chromap" name="chromap" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Fast alignment and preprocessing of chromatin profiles</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+    ## Step 1: Build index from reference FASTA
+    chromap
+        -i
+        -r '$input_options.ref'
+        -o chromap_index
+        -k $index_options.kmer
+        -w $index_options.window
+        #if $index_options.min_frag_length
+            --min-frag-length $index_options.min_frag_length
+        #end if
+    &&
+
+    ## Step 2: Map reads using built index
+    chromap
+        --preset $mapping_options.preset
+        #if $input_options.read_type.input_reads_type == 'single'
+            #set reads = $input_options.read_type.single_read
+            -1 #echo ' '.join(["'%s'" % f for f in str($reads).split(',')])#
+        #else
+            -1 '$input_options.read_type.paired_collection.forward'
+            -2 '$input_options.read_type.paired_collection.reverse'
+        #end if
+
+        ## --- Reference and index ---
+        -r '$input_options.ref'
+        -x chromap_index
+
+        ## --- Optional barcode inputs ---
+        #if $input_options.barcode
+            -b '$input_options.barcode'
+        #end if
+        #if $input_options.barcode_whitelist
+            --barcode-whitelist '$input_options.barcode_whitelist'
+        #end if
+        #if $input_options.read_format
+            --read-format '$input_options.read_format'
+        #end if
+        #if $input_options.barcode_translate
+            --barcode-translate '$input_options.barcode_translate'
+        #end if
+
+        ## --- Mapping options ---
+        $mapping_options.split_alignment
+        --error-threshold $mapping_options.error_threshold
+        --min-num-seeds $mapping_options.min_num_seeds
+        #if $mapping_options.max_seed_frequencies
+            --max-seed-frequencies '$mapping_options.max_seed_frequencies'
+        #end if
+        --max-insert-size $mapping_options.max_insert_size
+        --MAPQ-threshold $mapping_options.MAPQ_threshold
+        --min-read-length $mapping_options.min_read_length
+        $mapping_options.trim_adapters
+        $mapping_options.Tn5_shift
+        #if $mapping_options.bc_error_threshold
+            --bc-error-threshold $mapping_options.bc_error_threshold
+        #end if
+        #if $mapping_options.bc_probability_threshold
+            --bc-probability-threshold $mapping_options.bc_probability_threshold
+        #end if
+        #if $mapping_options.chr_order
+            --chr-order '$mapping_options.chr_order'
+        #end if
+        #if $mapping_options.pairs_natural_chr_order
+            --pairs-natural-chr-order '$mapping_options.pairs_natural_chr_order'
+        #end if
+
+        ## --- Output format ---
+        $output_options.out_format
+        #if $output_options.summary
+            --summary '$summary_out'
+        #end if
+        -t "\${GALAXY_SLOTS:-8}"
+        -o '$mapping_out'
+    
+    ]]></command>
+    <inputs>
+        <!-- Input Options -->
+        <section name="input_options" title="Input options" expanded="true">
+            <conditional name="read_type">
+                <param name="input_reads_type" type="select" label="Select the Input read type">
+                    <option value="single" selected="true">Single-end</option>
+                    <option value="paired">Paired-end collection</option>
+                </param>
+                <when value="single">
+                    <param name="single_read" type="data" format="fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz" multiple="true" label="Single Read"/>
+                </when>
+                <when value="paired">
+                    <param name="paired_collection" type="data_collection" collection_type="paired" label="Paired reads collection" help="Select a paired collection containing forward and reverse reads."/>
+                </when>
+            </conditional>
+            <param argument="--ref" type="data" format="fasta" label="Reference (FASTA)"/>
+            <param argument="--barcode" type="data" format="fastq,fastq.gz" label="Barcode file" optional="true"/>
+            <param argument="--barcode-whitelist" type="data" format="txt" label="Barcode whitelist file" optional="true"/>
+            <param argument="--read-format" type="text" optional="true" label="Read/barcode format string" help='Example: "r1:0:-1,bc:0:-1" (10x single-end)'/>
+            <param argument="--barcode-translate" type="data" format="tabular" label="Barcode translate file" optional="true"/>
+        </section>
+
+        <!-- Indexing Options -->
+        <section name="index_options" title="Indexing options" expanded="false">
+            <param argument="--min-frag-length" type="integer" optional="true" value="30" label="Min fragment length for choosing kmer length and window automatically" help="chromap --min-frag-length (default 30)"/>
+            <param argument="--kmer" type="integer" value="17" label="K-mer length"/>
+            <param argument="--window" type="integer" value="7" label="Window size"/>
+        </section>
+
+        <!-- Mapping Options -->
+        <section name="mapping_options" title="Mapping" expanded="false">
+            <param argument="--preset" type="select" label="Preset" help="Preset parameters for mapping reads">
+                <option value="atac">atac (ATAC-seq/scATAC-seq)</option>
+                <option value="chip">chip (ChIP-seq)</option>
+                <option value="hic">hic (Hi-C)</option>
+            </param>
+            <param argument="--split-alignment" type="boolean" label="Allow split alignments" truevalue="--split-alignment" falsevalue="" checked="false"/>
+            <param argument="--error-threshold" type="integer" value="8" label="Max errors allowed"/>
+            <param argument="--min-num-seeds" type="integer" value="2" label="Min number of seeds"/>
+            <param argument="--max-seed-frequencies" type="text" optional="true" value="500,1000" label="Max seed frequencies" help="Comma-separated(default 500,1000)"/>
+            <param argument="--max-insert-size" type="integer" value="1000" label="Max insert size (only for paired-end read mapping)"/>
+            <param argument="--MAPQ-threshold" type="integer" value="30" min="0" max="60" label="Min MAPQ (-q)"/>
+            <param argument="--min-read-length" type="integer" value="30" label="Minimum read length"/>
+            <param argument="--trim-adapters" type="boolean" label="Trim adapters on 3' (--trim-adapters)" truevalue="--trim-adapters" falsevalue="" checked="false"/>
+            <param argument="--Tn5-shift" type="boolean" label="Perform Tn5 shift" truevalue="--Tn5-shift" falsevalue="" checked="false"/>
+            <param argument="--bc-error-threshold" type="integer" optional="true" value="1" label="Barcode error threshold"/>
+            <param argument="--bc-probability-threshold" type="float" optional="true" value="0.9" label="Barcode probability threshold"/>
+            <param argument="--chr-order" type="data" format="tabular"  label="Custom chromosome order" optional="true"/>
+            <param argument="--pairs-natural-chr-order" type="data" format="tabular" label="Chrom order for pairs flipping" optional="true"/>
+
+        </section>
+
+        <!-- Output Options -->
+        <section name="output_options" title="Output" expanded="true">
+            <param name="out_format" type="select" label="Output format">
+                <option value="--SAM">SAM</option>
+                <option value="--BED" selected="true">BED/BEDPE</option>
+                <option value="--TagAlign">TagAlign/PairedTagAlign</option>
+                <option value="--pairs">4dn pairs</option>
+            </param>
+            <param name="summary" type="boolean" label="Produce summary file" truevalue="--summary" falsevalue="" checked="true"/>
+        </section>
+    </inputs>
+
+    <outputs>
+        <!-- Mapping primary output; actual datatype depends on out_format -->
+        <data name="mapping_out" format="bed" label="${tool.name} on ${on_string}: Mapping output">
+            <change_format>
+                <when input="output_options.out_format" value="--SAM" format="sam"/>
+                <when input="output_options.out_format" value="--BED" format="bed"/>
+                <when input="output_options.out_format" value="--TagAlign" format="tabular"/>
+                <when input="output_options.out_format" value="--pairs" format="4dn_pairs"/>
+            </change_format>
+        </data>
+        <data name="summary_out" format="txt" label="${tool.name} on ${on_string}: Summary">
+            <filter>output_options['summary']</filter>
+        </data>
+    </outputs>
+
+    <tests>
+    <!-- Test 1: Paired-end ChIP-seq, BED output, with summary. -->
+        <test expect_num_outputs="2">
+            <section name="input_options">
+                <conditional name="read_type">
+                    <param name="input_reads_type" value="paired"/>
+                    <param name="paired_collection">
+                        <collection type="paired">
+                            <element name="forward" value="read1.fq"/>
+                            <element name="reverse" value="read2.fq"/>
+                        </collection>
+                    </param>
+                </conditional>
+                <param name="ref" value="ref.fa" ftype="fasta"/>
+            </section>
+            <section name="index_options">
+                <param name="kmer" value="17"/>
+                <param name="window" value="7"/>
+            </section>
+            <section name="mapping_options">
+                <param name="preset" value="chip"/>
+                <param name="split_alignment" value="false"/>
+                <param name="error_threshold" value="8"/>
+                <param name="min_num_seeds" value="2"/>
+                <param name="max_insert_size" value="1000"/>
+                <param name="MAPQ_threshold" value="30"/>
+                <param name="min_read_length" value="30"/>
+                <param name="trim_adapters" value="false"/>
+                <param name="Tn5_shift" value="false"/>
+            </section>
+            <section name="output_options">
+                <param name="out_format" value="--BED"/>
+                <param name="summary" value="true"/>
+            </section>
+            <output name="mapping_out" file="test01_mapping.bed" ftype="bed"/>
+            <output name="summary_out" file="test01_summary.txt" ftype="txt"/>
+        </test>
+
+    <!-- Test 2: Single-end ATAC-seq, SAM output, Tn5 shift and adapter trimming enabled, no summary. -->
+        <test expect_num_outputs="1">
+            <section name="input_options">
+                <conditional name="read_type">
+                    <param name="input_reads_type" value="single"/>
+                    <param name="single_read" value="read1_se.fq"/>
+                </conditional>
+                <param name="ref" value="ref.fa" ftype="fasta"/>
+            </section>
+            <section name="index_options">
+                <param name="kmer" value="17"/>
+                <param name="window" value="7"/>
+            </section>
+            <section name="mapping_options">
+                <param name="preset" value="atac"/>
+                <param name="split_alignment" value="false"/>
+                <param name="error_threshold" value="8"/>
+                <param name="min_num_seeds" value="2"/>
+                <param name="max_insert_size" value="1000"/>
+                <param name="MAPQ_threshold" value="0"/>
+                <param name="min_read_length" value="30"/>
+                <param name="trim_adapters" value="true"/>
+                <param name="Tn5_shift" value="true"/>
+            </section>
+            <section name="output_options">
+                <param name="out_format" value="--SAM"/>
+                <param name="summary" value="false"/>
+            </section>
+            <output name="mapping_out" file="test02_mapping.sam" ftype="sam"/>
+        </test>
+
+    <!-- Test 3: Paired-end Hi-C, TagAlign output, split alignments on -->
+        <test expect_num_outputs="1">
+            <section name="input_options">
+                <conditional name="read_type">
+                    <param name="input_reads_type" value="paired"/>
+                    <param name="paired_collection">
+                        <collection type="paired">
+                            <element name="forward" value="read1.fq"/>
+                            <element name="reverse" value="read2.fq"/>
+                        </collection>
+                    </param>
+                </conditional>
+                <param name="ref" value="ref.fa" ftype="fasta"/>
+            </section>
+            <section name="index_options">
+                <param name="kmer" value="17"/>
+                <param name="window" value="7"/>
+            </section>
+            <section name="mapping_options">
+                <param name="preset" value="hic"/>
+                <param name="split_alignment" value="true"/>
+                <param name="error_threshold" value="8"/>
+                <param name="min_num_seeds" value="2"/>
+                <param name="max_insert_size" value="1000"/>
+                <param name="MAPQ_threshold" value="0"/>
+                <param name="min_read_length" value="30"/>
+                <param name="trim_adapters" value="false"/>
+                <param name="Tn5_shift" value="false"/>
+            </section>
+            <section name="output_options">
+                <param name="out_format" value="--TagAlign"/>
+                <param name="summary" value="false"/>
+            </section>
+            <output name="mapping_out" file="test03_mapping.tsv" ftype="tabular"/>
+        </test>
+
+    <!-- Test 4: Paired-end Hi-C, 4DN pairs output, preset hic, pairs format, summary off -->
+        <test expect_num_outputs="1">
+            <section name="input_options">
+                <conditional name="read_type">
+                    <param name="input_reads_type" value="paired"/>
+                    <param name="paired_collection">
+                        <collection type="paired">
+                            <element name="forward" value="read1.fq"/>
+                            <element name="reverse" value="read2.fq"/>
+                        </collection>
+                    </param>
+                </conditional>
+                <param name="ref" value="ref.fa" ftype="fasta"/>
+            </section>
+            <section name="index_options">
+                <param name="kmer" value="17"/>
+                <param name="window" value="7"/>
+            </section>
+            <section name="mapping_options">
+                <param name="preset" value="hic"/>
+                <param name="split_alignment" value="false"/>
+                <param name="error_threshold" value="8"/>
+                <param name="min_num_seeds" value="2"/>
+                <param name="max_insert_size" value="2000"/>
+                <param name="MAPQ_threshold" value="0"/>
+                <param name="min_read_length" value="30"/>
+                <param name="trim_adapters" value="false"/>
+                <param name="Tn5_shift" value="false"/>
+            </section>
+            <section name="output_options">
+                <param name="out_format" value="--pairs"/>
+                <param name="summary" value="false"/>
+            </section>
+            <output name="mapping_out" file="test04_mapping.pairs" ftype="4dn_pairs"/>
+        </test>
+
+    <!-- Test 5: Single-end scATAC with barcode file and whitelist -->
+        <test expect_num_outputs="2">
+            <section name="input_options">
+                <conditional name="read_type">
+                    <param name="input_reads_type" value="single"/>
+                    <param name="single_read" value="read1_se.fq"/>
+                </conditional>
+                <param name="ref" value="ref.fa" ftype="fasta"/>
+                <param name="barcode" value="barcode.fq"/>
+                <param name="barcode_whitelist" value="whitelist.txt"/>
+                <param name="read_format" value="r1:0:-1,bc:0:-1"/>
+            </section>
+            <section name="index_options">
+                <param name="kmer" value="17"/>
+                <param name="window" value="7"/>
+            </section>
+            <section name="mapping_options">
+                <param name="preset" value="atac"/>
+                <param name="split_alignment" value="false"/>
+                <param name="error_threshold" value="8"/>
+                <param name="min_num_seeds" value="2"/>
+                <param name="max_insert_size" value="1000"/>
+                <param name="MAPQ_threshold" value="0"/>
+                <param name="min_read_length" value="30"/>
+                <param name="trim_adapters" value="false"/>
+                <param name="Tn5_shift" value="false"/>
+                <param name="bc_error_threshold" value="1"/>
+                <param name="bc_probability_threshold" value="0.9"/>
+            </section>
+            <section name="output_options">
+                <param name="out_format" value="--BED"/>
+                <param name="summary" value="true"/>
+            </section>
+            <output name="mapping_out" file="test05_mapping.bed" ftype="bed"/>
+            <output name="summary_out" file="test05_summary.txt" ftype="txt"/>
+        </test>
+
+    <!-- Test 6: Single-end ATAC, relaxed MAPQ (threshold=0), custom kmer/window, no summary -->
+        <test expect_num_outputs="1">
+            <section name="input_options">
+                <conditional name="read_type">
+                    <param name="input_reads_type" value="single"/>
+                    <param name="single_read" value="read1_se.fq"/>
+                </conditional>
+                <param name="ref" value="ref.fa" ftype="fasta"/>
+            </section>
+            <section name="index_options">
+                <param name="kmer" value="15"/>
+                <param name="window" value="5"/>
+            </section>
+            <section name="mapping_options">
+                <param name="preset" value="atac"/>
+                <param name="split_alignment" value="false"/>
+                <param name="error_threshold" value="8"/>
+                <param name="min_num_seeds" value="2"/>
+                <param name="max_insert_size" value="1000"/>
+                <param name="MAPQ_threshold" value="0"/>
+                <param name="min_read_length" value="30"/>
+                <param name="trim_adapters" value="false"/>
+                <param name="Tn5_shift" value="false"/>
+            </section>
+            <section name="output_options">
+                <param name="out_format" value="--BED"/>
+                <param name="summary" value="false"/>
+            </section>
+            <output name="mapping_out" file="test06_mapping.bed" ftype="bed"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+
+**chromap** is a fast aligner and preprocessor for chromatin profiling data (ATAC-seq, ChIP-seq, Hi-C and their single-cell variants).
+
+-----
+
+**Inputs**
+
+*Reads* : Provide either single-end FASTQ files or a paired-end collection. Multiple single-end files can be selected and will be processed together.  
+
+*Reference* : A reference genome in FASTA format. The index is built automatically — no separate indexing step is needed.  
+
+*Barcode file* (optional) : For single-cell experiments, provide a FASTQ file containing cell barcode sequences. Use the **Read/barcode format string** to describe how reads and barcodes are distributed across files. The default ``r1:0:-1,bc:0:-1`` corresponds to 10x Genomics single-end layout.  
+
+*Barcode whitelist* (optional) : A plain-text file of known valid barcodes (one per line). Barcodes not in the list will be corrected if within the Hamming distance set by **Barcode error threshold**. Without a whitelist, all barcodes are passed through uncorrected.  
+
+-----
+
+**Preset**
+
+Presets load recommended parameter bundles for each assay type. They are applied first; any parameter you set explicitly will override the preset value.
+
+- *atac* - ATAC-seq / scATAC-seq
+- *chip* - ChIP-seq
+- *hic* - Hi-C
+
+-----
+
+**Indexing options**
+
+These control the minimiser index built from the reference before mapping.
+
+- **K-mer length** (default 17) and **Window size** (default 7) together determine index density and sensitivity. Shorter k-mers or smaller windows increase sensitivity at the cost of speed and memory.
+- **Min fragment length** : if set, chromap automatically chooses k and w to suit the expected fragment size, ignoring the manual values above.
+
+-----
+
+**Key mapping parameters**
+
+- **Tn5 shift** : shifts read 5′ ends by +4 bp (forward) or −5 bp (reverse) to centre on the Tn5 insertion site. Enable this for ATAC-seq when calling peaks with MACS2 or similar tools.
+- **Trim adapters** : detects and removes 3′ adapter sequence before alignment. Useful when reads extend beyond short inserts.
+- **Split alignments** : allows a read to align as two separate segments. Required for Hi-C reads spanning a ligation junction.
+- **Min MAPQ** (default 30) : alignments below this mapping quality are excluded from the output. Set to 0 to retain all alignments.
+- **Max insert size** (default 1000) : paired-end only. Read pairs with an inferred insert size above this value are not reported.
+- **Max errors** (default 8) : maximum mismatches/indels allowed in a reported alignment.
+- **Max seed frequencies** (default ``500,1000``) : seeds found more often than these thresholds are skipped as repetitive. Reducing these values speeds up mapping in repetitive genomes at the cost of sensitivity.
+
+-----
+
+
+**Output formats**
+
+Based on the selected options, output can be: SAM, BED / BEDPE, TagAlign, or 4DN pairs format.
+
+*Summary file* : when enabled, produces a CSV with per-barcode (or bulk) alignment statistics including total reads, duplicates, unmapped, low-MAPQ counts, and an estimated FRiP score.
+
+
+-----
+
+**Tips**
+
+- For bulk ATAC-seq peak calling, use the ``atac`` preset with **Tn5 shift** enabled and **BED** output.
+- For scATAC-seq, add a barcode file and whitelist; the summary CSV will contain one row per cell barcode.
+- For Hi-C contact matrix generation, use the ``hic`` preset with **4DN pairs** output and enable **Split alignments**.
+
+
+    ]]></help>
+    <expand macro="citations"/>
+    <expand macro="creator"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,21 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.3.2</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">25.0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">chromap</requirement>
+        </requirements>
+    </xml>
+    <xml name="creator">
+        <creator>
+            <person givenName="Saim" familyName="Momin" url="https://github.com/SaimMomin12"/>
+            <organization name="Galaxy Europe" url="https://galaxyproject.org/eu/"/>
+        </creator>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41467-021-26865-w</citation>
+        </citations>
+    </xml>
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/barcode.fq	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,32 @@
+@read_1
+TAAACTTGGCTTGACT
++
+IIIIIIIIIIIIIIII
+@read_2
+TGAGCTCGCTTTAACG
++
+IIIIIIIIIIIIIIII
+@read_3
+TCGCTACACGCCGAAT
++
+IIIIIIIIIIIIIIII
+@read_4
+CATATATGACAATTCT
++
+IIIIIIIIIIIIIIII
+@read_5
+GCACGGGTAAAAAAGC
++
+IIIIIIIIIIIIIIII
+@read_6
+CCACAATGAATCTTTA
++
+IIIIIIIIIIIIIIII
+@read_7
+CCGTTTGGTCGACGAT
++
+IIIIIIIIIIIIIIII
+@read_8
+CATCTGCCCGCTCTGG
++
+IIIIIIIIIIIIIIII
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/read1.fq	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,32 @@
+@read_1/1
+TAACGGAGATCGTGCGAATAACCTGTCTAATATCTACTAAAGGTATCTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_2/1
+CGTTAATTAAAGACCAAGAACTTGCAATTTGGCATTCAATTAACTCTACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_3/1
+TGATCCAAACCCTACGGCGACAGCAAATAGGAGATCCATAAGGAGTTAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_4/1
+CTCACACAATGTGTTTAGACTGGGTAGTTCGTTTTAATCGCGTTAATTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_5/1
+CGTAAGGTGAAAATTAAGGATTTATCTGCGTATGCCTGTGAATATGTATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_6/1
+GGAGCGACGTTTTTCCACGCGTGCACTTTGACCACATGTACAAGTCGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_7/1
+AACTCTAGAATTCAGGGATTTCCTGGGCAAGAAAATTAAGGTGCGGGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_8/1
+TCTTCGTTATGGGATCTGACGAATTACCTACTGTACCATTCTCCAGTCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/read1_se.fq	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,32 @@
+@read_1
+TAACGGAGATCGTGCGAATAACCTGTCTAATATCTACTAAAGGTATCTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_2
+CGTTAATTAAAGACCAAGAACTTGCAATTTGGCATTCAATTAACTCTACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_3
+TGATCCAAACCCTACGGCGACAGCAAATAGGAGATCCATAAGGAGTTAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_4
+CTCACACAATGTGTTTAGACTGGGTAGTTCGTTTTAATCGCGTTAATTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_5
+CGTAAGGTGAAAATTAAGGATTTATCTGCGTATGCCTGTGAATATGTATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_6
+GGAGCGACGTTTTTCCACGCGTGCACTTTGACCACATGTACAAGTCGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_7
+AACTCTAGAATTCAGGGATTTCCTGGGCAAGAAAATTAAGGTGCGGGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_8
+TCTTCGTTATGGGATCTGACGAATTACCTACTGTACCATTCTCCAGTCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/read2.fq	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,32 @@
+@read_1/2
+GGTAGAGTTAATTGAATGCCAAATTGCAAGTTCTTGGTCTTTAATTAACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_2/2
+GTTAACTCCTTATGGATCTCCTATTTGCTGTCGCCGTAGGGTTTGGATCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_3/2
+ACAATTAACGCGATTAAAACGAACTACCCAGTCTAAACACATTGTGTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_4/2
+TATACATATTCACAGGCATACGCAGATAAATCCTTAATTTTCACCTTACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_5/2
+GTTCGACTTGTACATGTGGTCAAAGTGCACGCGTGGAAAAACGTCGCTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_6/2
+AAGCCCGCACCTTAATTTTCTTGCCCAGGAAATCCCTGAATTCTAGAGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_7/2
+GAGACTGGAGAATGGTACAGTAGGTAATTCGTCAGATCCCATAACGAAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@read_8/2
+AATAATGTACAAATCGGTTTATGTCGTTCAATATCTGCAGCTACGGCTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ref.fa	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,18 @@
+>chr1
+GAAAGGCATAATAAGTATCACGTACTAACGCGTCTTCGCTGAAAAAAAGTTAACGGAGAT
+CGTGCGAATAACCTGTCTAATATCTACTAAAGGTATCTCCAGGTAGATTCCATACCAGGA
+GTGTATACCCTACCATAGGATTACTATGATCGTTAATTAAAGACCAAGAACTTGCAATTT
+GGCATTCAATTAACTCTACCCCCATATATCAGTTCCTGAACTTGATTCACAATGAACATG
+TTTCAGATGATGATCCAAACCCTACGGCGACAGCAAATAGGAGATCCATAAGGAGTTAAC
+CTCTAATCGCCAAAGCTGACCCCCAGTCCCCAGACCACTTGAAATCCAGTCTCACACAAT
+GTGTTTAGACTGGGTAGTTCGTTTTAATCGCGTTAATTGTTATCGAATGTCGGAAAATCA
+TGAGTAGAGGATACTAACTCGCTCCGGTCTCGTAAGGTGAAAATTAAGGATTTATCTGCG
+TATGCCTGTGAATATGTATAGATTAGATATATGTGCAAATCTGGGGCAAAAGTAGGAGGA
+CCAATGCTGAGGAGCGACGTTTTTCCACGCGTGCACTTTGACCACATGTACAAGTCGAAC
+AGTGGGTGAAGTTTTTGTGAAAAATGAATGCTAAAAAATACTGACTCTTTAACTCTAGAA
+TTCAGGGATTTCCTGGGCAAGAAAATTAAGGTGCGGGCTTGCCAATGTAAGGCTTAATTA
+ACCTCCGAAGTGCATGTATTGCTGACCTTTTCTTCGTTATGGGATCTGACGAATTACCTA
+CTGTACCATTCTCCAGTCTCAATCTATTTTTTAGTAGAGGCTGCCTATTCCTTTGTGATC
+TGGCCCTTGGCAAGCCGTAGCTGCAGATATTGAACGACATAAACCGATTTGTACATTATT
+CACGATGGAGTCAGGTGGGGGCGCGTTTGAAGAATCTCCACTCGTACACCGCCCTGATTG
+GTCCAAACTCAACCTTACTTACATGGCTGATATTCATTCC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test01_mapping.bed	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,8 @@
+chr1	50	200	N	60	+	1
+chr1	150	300	N	60	+	1
+chr1	250	400	N	60	+	1
+chr1	350	500	N	60	+	1
+chr1	450	600	N	60	+	1
+chr1	550	700	N	60	+	1
+chr1	650	800	N	60	+	1
+chr1	750	900	N	60	+	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test01_summary.txt	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,2 @@
+barcode,total,duplicate,unmapped,lowmapq,cachehit,fric,estfrip,numcacheslots
+,8,0,0,0,0,0.00000,0.00000,0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test02_mapping.sam	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,9 @@
+@SQ	SN:chr1	LN:1000
+read_1	0	chr1	51	60	50M	*	0	0	TAACGGAGATCGTGCGAATAACCTGTCTAATATCTACTAAAGGTATCTCC	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:0	MD:Z:50
+read_2	0	chr1	151	60	50M	*	0	0	CGTTAATTAAAGACCAAGAACTTGCAATTTGGCATTCAATTAACTCTACC	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:0	MD:Z:50
+read_3	0	chr1	251	60	50M	*	0	0	TGATCCAAACCCTACGGCGACAGCAAATAGGAGATCCATAAGGAGTTAAC	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:0	MD:Z:50
+read_4	0	chr1	351	60	50M	*	0	0	CTCACACAATGTGTTTAGACTGGGTAGTTCGTTTTAATCGCGTTAATTGT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:0	MD:Z:50
+read_5	0	chr1	451	60	50M	*	0	0	CGTAAGGTGAAAATTAAGGATTTATCTGCGTATGCCTGTGAATATGTATA	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:0	MD:Z:50
+read_6	0	chr1	551	60	50M	*	0	0	GGAGCGACGTTTTTCCACGCGTGCACTTTGACCACATGTACAAGTCGAAC	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:0	MD:Z:50
+read_7	0	chr1	651	60	50M	*	0	0	AACTCTAGAATTCAGGGATTTCCTGGGCAAGAAAATTAAGGTGCGGGCTT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:0	MD:Z:50
+read_8	0	chr1	751	60	50M	*	0	0	TCTTCGTTATGGGATCTGACGAATTACCTACTGTACCATTCTCCAGTCTC	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:0	MD:Z:50
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test03_mapping.tsv	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,16 @@
+chr1	50	100	N	39	+
+chr1	150	200	N	39	-	1
+chr1	150	200	N	39	+
+chr1	250	300	N	39	-	1
+chr1	250	300	N	39	+
+chr1	350	400	N	39	-	1
+chr1	350	400	N	39	+
+chr1	450	500	N	39	-	1
+chr1	450	500	N	39	+
+chr1	550	600	N	39	-	1
+chr1	550	600	N	39	+
+chr1	650	700	N	39	-	1
+chr1	650	700	N	39	+
+chr1	750	800	N	39	-	1
+chr1	750	800	N	39	+
+chr1	850	900	N	39	-	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test04_mapping.pairs	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,12 @@
+## pairs format v1.0.0
+#shape: upper triangle
+#chromsize: chr1 1000
+#columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type
+read_1/1	chr1	51	chr1	200	+	-	UU
+read_2/1	chr1	151	chr1	300	+	-	UU
+read_3/1	chr1	251	chr1	400	+	-	UU
+read_4/1	chr1	351	chr1	500	+	-	UU
+read_5/1	chr1	451	chr1	600	+	-	UU
+read_6/1	chr1	551	chr1	700	+	-	UU
+read_7/1	chr1	651	chr1	800	+	-	UU
+read_8/1	chr1	751	chr1	900	+	-	UU
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test05_mapping.bed	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,8 @@
+chr1	54	104	TAAACTTGGCTTGACT	1
+chr1	154	204	TGAGCTCGCTTTAACG	1
+chr1	254	304	TCGCTACACGCCGAAT	1
+chr1	354	404	CATATATGACAATTCT	1
+chr1	454	504	GCACGGGTAAAAAAGC	1
+chr1	554	604	CCACAATGAATCTTTA	1
+chr1	654	704	CCGTTTGGTCGACGAT	1
+chr1	754	804	CATCTGCCCGCTCTGG	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test05_summary.txt	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,10 @@
+barcode,total,duplicate,unmapped,lowmapq,cachehit,fric,estfrip,numcacheslots
+TCGCTACACGCCGAAT,1,0,0,0,0,0.00000,0.00000,0
+CCGTTTGGTCGACGAT,1,0,0,0,0,0.00000,0.00000,0
+TGAGCTCGCTTTAACG,1,0,0,0,0,0.00000,0.00000,0
+CATATATGACAATTCT,1,0,0,0,0,0.00000,0.00000,0
+TAAACTTGGCTTGACT,1,0,0,0,0,0.00000,0.00000,0
+GCACGGGTAAAAAAGC,1,0,0,0,0,0.00000,0.00000,0
+CATCTGCCCGCTCTGG,1,0,0,0,0,0.00000,0.00000,0
+CCACAATGAATCTTTA,1,0,0,0,0,0.00000,0.00000,0
+non-whitelist,0,0,0,0,0,0.00000,0.00000,0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test06_mapping.bed	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,8 @@
+chr1	54	104	N	60	+	1
+chr1	154	204	N	60	+	1
+chr1	254	304	N	60	+	1
+chr1	354	404	N	60	+	1
+chr1	454	504	N	60	+	1
+chr1	554	604	N	60	+	1
+chr1	654	704	N	60	+	1
+chr1	754	804	N	60	+	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/whitelist.txt	Tue Feb 17 19:09:08 2026 +0000
@@ -0,0 +1,8 @@
+TAAACTTGGCTTGACT
+TGAGCTCGCTTTAACG
+TCGCTACACGCCGAAT
+CATATATGACAATTCT
+GCACGGGTAAAAAAGC
+CCACAATGAATCTTTA
+CCGTTTGGTCGACGAT
+CATCTGCCCGCTCTGG