Mercurial > repos > iuc > chromap
changeset 0:61fa9655ab32 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/chromap commit 392fc1bebfff21996c13ba0edb952b5f3784cca2
| author | iuc |
|---|---|
| date | Tue, 17 Feb 2026 19:09:08 +0000 |
| parents | |
| children | |
| files | chromap.xml macros.xml test-data/barcode.fq test-data/read1.fq test-data/read1_se.fq test-data/read2.fq test-data/ref.fa test-data/test01_mapping.bed test-data/test01_summary.txt test-data/test02_mapping.sam test-data/test03_mapping.tsv test-data/test04_mapping.pairs test-data/test05_mapping.bed test-data/test05_summary.txt test-data/test06_mapping.bed test-data/whitelist.txt |
| diffstat | 16 files changed, 689 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chromap.xml Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,441 @@ +<tool id="chromap" name="chromap" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Fast alignment and preprocessing of chromatin profiles</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + ## Step 1: Build index from reference FASTA + chromap + -i + -r '$input_options.ref' + -o chromap_index + -k $index_options.kmer + -w $index_options.window + #if $index_options.min_frag_length + --min-frag-length $index_options.min_frag_length + #end if + && + + ## Step 2: Map reads using built index + chromap + --preset $mapping_options.preset + #if $input_options.read_type.input_reads_type == 'single' + #set reads = $input_options.read_type.single_read + -1 #echo ' '.join(["'%s'" % f for f in str($reads).split(',')])# + #else + -1 '$input_options.read_type.paired_collection.forward' + -2 '$input_options.read_type.paired_collection.reverse' + #end if + + ## --- Reference and index --- + -r '$input_options.ref' + -x chromap_index + + ## --- Optional barcode inputs --- + #if $input_options.barcode + -b '$input_options.barcode' + #end if + #if $input_options.barcode_whitelist + --barcode-whitelist '$input_options.barcode_whitelist' + #end if + #if $input_options.read_format + --read-format '$input_options.read_format' + #end if + #if $input_options.barcode_translate + --barcode-translate '$input_options.barcode_translate' + #end if + + ## --- Mapping options --- + $mapping_options.split_alignment + --error-threshold $mapping_options.error_threshold + --min-num-seeds $mapping_options.min_num_seeds + #if $mapping_options.max_seed_frequencies + --max-seed-frequencies '$mapping_options.max_seed_frequencies' + #end if + --max-insert-size $mapping_options.max_insert_size + --MAPQ-threshold $mapping_options.MAPQ_threshold + --min-read-length $mapping_options.min_read_length + $mapping_options.trim_adapters + $mapping_options.Tn5_shift + #if $mapping_options.bc_error_threshold + --bc-error-threshold $mapping_options.bc_error_threshold + #end if + #if $mapping_options.bc_probability_threshold + --bc-probability-threshold $mapping_options.bc_probability_threshold + #end if + #if $mapping_options.chr_order + --chr-order '$mapping_options.chr_order' + #end if + #if $mapping_options.pairs_natural_chr_order + --pairs-natural-chr-order '$mapping_options.pairs_natural_chr_order' + #end if + + ## --- Output format --- + $output_options.out_format + #if $output_options.summary + --summary '$summary_out' + #end if + -t "\${GALAXY_SLOTS:-8}" + -o '$mapping_out' + + ]]></command> + <inputs> + <!-- Input Options --> + <section name="input_options" title="Input options" expanded="true"> + <conditional name="read_type"> + <param name="input_reads_type" type="select" label="Select the Input read type"> + <option value="single" selected="true">Single-end</option> + <option value="paired">Paired-end collection</option> + </param> + <when value="single"> + <param name="single_read" type="data" format="fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz" multiple="true" label="Single Read"/> + </when> + <when value="paired"> + <param name="paired_collection" type="data_collection" collection_type="paired" label="Paired reads collection" help="Select a paired collection containing forward and reverse reads."/> + </when> + </conditional> + <param argument="--ref" type="data" format="fasta" label="Reference (FASTA)"/> + <param argument="--barcode" type="data" format="fastq,fastq.gz" label="Barcode file" optional="true"/> + <param argument="--barcode-whitelist" type="data" format="txt" label="Barcode whitelist file" optional="true"/> + <param argument="--read-format" type="text" optional="true" label="Read/barcode format string" help='Example: "r1:0:-1,bc:0:-1" (10x single-end)'/> + <param argument="--barcode-translate" type="data" format="tabular" label="Barcode translate file" optional="true"/> + </section> + + <!-- Indexing Options --> + <section name="index_options" title="Indexing options" expanded="false"> + <param argument="--min-frag-length" type="integer" optional="true" value="30" label="Min fragment length for choosing kmer length and window automatically" help="chromap --min-frag-length (default 30)"/> + <param argument="--kmer" type="integer" value="17" label="K-mer length"/> + <param argument="--window" type="integer" value="7" label="Window size"/> + </section> + + <!-- Mapping Options --> + <section name="mapping_options" title="Mapping" expanded="false"> + <param argument="--preset" type="select" label="Preset" help="Preset parameters for mapping reads"> + <option value="atac">atac (ATAC-seq/scATAC-seq)</option> + <option value="chip">chip (ChIP-seq)</option> + <option value="hic">hic (Hi-C)</option> + </param> + <param argument="--split-alignment" type="boolean" label="Allow split alignments" truevalue="--split-alignment" falsevalue="" checked="false"/> + <param argument="--error-threshold" type="integer" value="8" label="Max errors allowed"/> + <param argument="--min-num-seeds" type="integer" value="2" label="Min number of seeds"/> + <param argument="--max-seed-frequencies" type="text" optional="true" value="500,1000" label="Max seed frequencies" help="Comma-separated(default 500,1000)"/> + <param argument="--max-insert-size" type="integer" value="1000" label="Max insert size (only for paired-end read mapping)"/> + <param argument="--MAPQ-threshold" type="integer" value="30" min="0" max="60" label="Min MAPQ (-q)"/> + <param argument="--min-read-length" type="integer" value="30" label="Minimum read length"/> + <param argument="--trim-adapters" type="boolean" label="Trim adapters on 3' (--trim-adapters)" truevalue="--trim-adapters" falsevalue="" checked="false"/> + <param argument="--Tn5-shift" type="boolean" label="Perform Tn5 shift" truevalue="--Tn5-shift" falsevalue="" checked="false"/> + <param argument="--bc-error-threshold" type="integer" optional="true" value="1" label="Barcode error threshold"/> + <param argument="--bc-probability-threshold" type="float" optional="true" value="0.9" label="Barcode probability threshold"/> + <param argument="--chr-order" type="data" format="tabular" label="Custom chromosome order" optional="true"/> + <param argument="--pairs-natural-chr-order" type="data" format="tabular" label="Chrom order for pairs flipping" optional="true"/> + + </section> + + <!-- Output Options --> + <section name="output_options" title="Output" expanded="true"> + <param name="out_format" type="select" label="Output format"> + <option value="--SAM">SAM</option> + <option value="--BED" selected="true">BED/BEDPE</option> + <option value="--TagAlign">TagAlign/PairedTagAlign</option> + <option value="--pairs">4dn pairs</option> + </param> + <param name="summary" type="boolean" label="Produce summary file" truevalue="--summary" falsevalue="" checked="true"/> + </section> + </inputs> + + <outputs> + <!-- Mapping primary output; actual datatype depends on out_format --> + <data name="mapping_out" format="bed" label="${tool.name} on ${on_string}: Mapping output"> + <change_format> + <when input="output_options.out_format" value="--SAM" format="sam"/> + <when input="output_options.out_format" value="--BED" format="bed"/> + <when input="output_options.out_format" value="--TagAlign" format="tabular"/> + <when input="output_options.out_format" value="--pairs" format="4dn_pairs"/> + </change_format> + </data> + <data name="summary_out" format="txt" label="${tool.name} on ${on_string}: Summary"> + <filter>output_options['summary']</filter> + </data> + </outputs> + + <tests> + <!-- Test 1: Paired-end ChIP-seq, BED output, with summary. --> + <test expect_num_outputs="2"> + <section name="input_options"> + <conditional name="read_type"> + <param name="input_reads_type" value="paired"/> + <param name="paired_collection"> + <collection type="paired"> + <element name="forward" value="read1.fq"/> + <element name="reverse" value="read2.fq"/> + </collection> + </param> + </conditional> + <param name="ref" value="ref.fa" ftype="fasta"/> + </section> + <section name="index_options"> + <param name="kmer" value="17"/> + <param name="window" value="7"/> + </section> + <section name="mapping_options"> + <param name="preset" value="chip"/> + <param name="split_alignment" value="false"/> + <param name="error_threshold" value="8"/> + <param name="min_num_seeds" value="2"/> + <param name="max_insert_size" value="1000"/> + <param name="MAPQ_threshold" value="30"/> + <param name="min_read_length" value="30"/> + <param name="trim_adapters" value="false"/> + <param name="Tn5_shift" value="false"/> + </section> + <section name="output_options"> + <param name="out_format" value="--BED"/> + <param name="summary" value="true"/> + </section> + <output name="mapping_out" file="test01_mapping.bed" ftype="bed"/> + <output name="summary_out" file="test01_summary.txt" ftype="txt"/> + </test> + + <!-- Test 2: Single-end ATAC-seq, SAM output, Tn5 shift and adapter trimming enabled, no summary. --> + <test expect_num_outputs="1"> + <section name="input_options"> + <conditional name="read_type"> + <param name="input_reads_type" value="single"/> + <param name="single_read" value="read1_se.fq"/> + </conditional> + <param name="ref" value="ref.fa" ftype="fasta"/> + </section> + <section name="index_options"> + <param name="kmer" value="17"/> + <param name="window" value="7"/> + </section> + <section name="mapping_options"> + <param name="preset" value="atac"/> + <param name="split_alignment" value="false"/> + <param name="error_threshold" value="8"/> + <param name="min_num_seeds" value="2"/> + <param name="max_insert_size" value="1000"/> + <param name="MAPQ_threshold" value="0"/> + <param name="min_read_length" value="30"/> + <param name="trim_adapters" value="true"/> + <param name="Tn5_shift" value="true"/> + </section> + <section name="output_options"> + <param name="out_format" value="--SAM"/> + <param name="summary" value="false"/> + </section> + <output name="mapping_out" file="test02_mapping.sam" ftype="sam"/> + </test> + + <!-- Test 3: Paired-end Hi-C, TagAlign output, split alignments on --> + <test expect_num_outputs="1"> + <section name="input_options"> + <conditional name="read_type"> + <param name="input_reads_type" value="paired"/> + <param name="paired_collection"> + <collection type="paired"> + <element name="forward" value="read1.fq"/> + <element name="reverse" value="read2.fq"/> + </collection> + </param> + </conditional> + <param name="ref" value="ref.fa" ftype="fasta"/> + </section> + <section name="index_options"> + <param name="kmer" value="17"/> + <param name="window" value="7"/> + </section> + <section name="mapping_options"> + <param name="preset" value="hic"/> + <param name="split_alignment" value="true"/> + <param name="error_threshold" value="8"/> + <param name="min_num_seeds" value="2"/> + <param name="max_insert_size" value="1000"/> + <param name="MAPQ_threshold" value="0"/> + <param name="min_read_length" value="30"/> + <param name="trim_adapters" value="false"/> + <param name="Tn5_shift" value="false"/> + </section> + <section name="output_options"> + <param name="out_format" value="--TagAlign"/> + <param name="summary" value="false"/> + </section> + <output name="mapping_out" file="test03_mapping.tsv" ftype="tabular"/> + </test> + + <!-- Test 4: Paired-end Hi-C, 4DN pairs output, preset hic, pairs format, summary off --> + <test expect_num_outputs="1"> + <section name="input_options"> + <conditional name="read_type"> + <param name="input_reads_type" value="paired"/> + <param name="paired_collection"> + <collection type="paired"> + <element name="forward" value="read1.fq"/> + <element name="reverse" value="read2.fq"/> + </collection> + </param> + </conditional> + <param name="ref" value="ref.fa" ftype="fasta"/> + </section> + <section name="index_options"> + <param name="kmer" value="17"/> + <param name="window" value="7"/> + </section> + <section name="mapping_options"> + <param name="preset" value="hic"/> + <param name="split_alignment" value="false"/> + <param name="error_threshold" value="8"/> + <param name="min_num_seeds" value="2"/> + <param name="max_insert_size" value="2000"/> + <param name="MAPQ_threshold" value="0"/> + <param name="min_read_length" value="30"/> + <param name="trim_adapters" value="false"/> + <param name="Tn5_shift" value="false"/> + </section> + <section name="output_options"> + <param name="out_format" value="--pairs"/> + <param name="summary" value="false"/> + </section> + <output name="mapping_out" file="test04_mapping.pairs" ftype="4dn_pairs"/> + </test> + + <!-- Test 5: Single-end scATAC with barcode file and whitelist --> + <test expect_num_outputs="2"> + <section name="input_options"> + <conditional name="read_type"> + <param name="input_reads_type" value="single"/> + <param name="single_read" value="read1_se.fq"/> + </conditional> + <param name="ref" value="ref.fa" ftype="fasta"/> + <param name="barcode" value="barcode.fq"/> + <param name="barcode_whitelist" value="whitelist.txt"/> + <param name="read_format" value="r1:0:-1,bc:0:-1"/> + </section> + <section name="index_options"> + <param name="kmer" value="17"/> + <param name="window" value="7"/> + </section> + <section name="mapping_options"> + <param name="preset" value="atac"/> + <param name="split_alignment" value="false"/> + <param name="error_threshold" value="8"/> + <param name="min_num_seeds" value="2"/> + <param name="max_insert_size" value="1000"/> + <param name="MAPQ_threshold" value="0"/> + <param name="min_read_length" value="30"/> + <param name="trim_adapters" value="false"/> + <param name="Tn5_shift" value="false"/> + <param name="bc_error_threshold" value="1"/> + <param name="bc_probability_threshold" value="0.9"/> + </section> + <section name="output_options"> + <param name="out_format" value="--BED"/> + <param name="summary" value="true"/> + </section> + <output name="mapping_out" file="test05_mapping.bed" ftype="bed"/> + <output name="summary_out" file="test05_summary.txt" ftype="txt"/> + </test> + + <!-- Test 6: Single-end ATAC, relaxed MAPQ (threshold=0), custom kmer/window, no summary --> + <test expect_num_outputs="1"> + <section name="input_options"> + <conditional name="read_type"> + <param name="input_reads_type" value="single"/> + <param name="single_read" value="read1_se.fq"/> + </conditional> + <param name="ref" value="ref.fa" ftype="fasta"/> + </section> + <section name="index_options"> + <param name="kmer" value="15"/> + <param name="window" value="5"/> + </section> + <section name="mapping_options"> + <param name="preset" value="atac"/> + <param name="split_alignment" value="false"/> + <param name="error_threshold" value="8"/> + <param name="min_num_seeds" value="2"/> + <param name="max_insert_size" value="1000"/> + <param name="MAPQ_threshold" value="0"/> + <param name="min_read_length" value="30"/> + <param name="trim_adapters" value="false"/> + <param name="Tn5_shift" value="false"/> + </section> + <section name="output_options"> + <param name="out_format" value="--BED"/> + <param name="summary" value="false"/> + </section> + <output name="mapping_out" file="test06_mapping.bed" ftype="bed"/> + </test> + </tests> + + <help><![CDATA[ + +**chromap** is a fast aligner and preprocessor for chromatin profiling data (ATAC-seq, ChIP-seq, Hi-C and their single-cell variants). + +----- + +**Inputs** + +*Reads* : Provide either single-end FASTQ files or a paired-end collection. Multiple single-end files can be selected and will be processed together. + +*Reference* : A reference genome in FASTA format. The index is built automatically — no separate indexing step is needed. + +*Barcode file* (optional) : For single-cell experiments, provide a FASTQ file containing cell barcode sequences. Use the **Read/barcode format string** to describe how reads and barcodes are distributed across files. The default ``r1:0:-1,bc:0:-1`` corresponds to 10x Genomics single-end layout. + +*Barcode whitelist* (optional) : A plain-text file of known valid barcodes (one per line). Barcodes not in the list will be corrected if within the Hamming distance set by **Barcode error threshold**. Without a whitelist, all barcodes are passed through uncorrected. + +----- + +**Preset** + +Presets load recommended parameter bundles for each assay type. They are applied first; any parameter you set explicitly will override the preset value. + +- *atac* - ATAC-seq / scATAC-seq +- *chip* - ChIP-seq +- *hic* - Hi-C + +----- + +**Indexing options** + +These control the minimiser index built from the reference before mapping. + +- **K-mer length** (default 17) and **Window size** (default 7) together determine index density and sensitivity. Shorter k-mers or smaller windows increase sensitivity at the cost of speed and memory. +- **Min fragment length** : if set, chromap automatically chooses k and w to suit the expected fragment size, ignoring the manual values above. + +----- + +**Key mapping parameters** + +- **Tn5 shift** : shifts read 5′ ends by +4 bp (forward) or −5 bp (reverse) to centre on the Tn5 insertion site. Enable this for ATAC-seq when calling peaks with MACS2 or similar tools. +- **Trim adapters** : detects and removes 3′ adapter sequence before alignment. Useful when reads extend beyond short inserts. +- **Split alignments** : allows a read to align as two separate segments. Required for Hi-C reads spanning a ligation junction. +- **Min MAPQ** (default 30) : alignments below this mapping quality are excluded from the output. Set to 0 to retain all alignments. +- **Max insert size** (default 1000) : paired-end only. Read pairs with an inferred insert size above this value are not reported. +- **Max errors** (default 8) : maximum mismatches/indels allowed in a reported alignment. +- **Max seed frequencies** (default ``500,1000``) : seeds found more often than these thresholds are skipped as repetitive. Reducing these values speeds up mapping in repetitive genomes at the cost of sensitivity. + +----- + + +**Output formats** + +Based on the selected options, output can be: SAM, BED / BEDPE, TagAlign, or 4DN pairs format. + +*Summary file* : when enabled, produces a CSV with per-barcode (or bulk) alignment statistics including total reads, duplicates, unmapped, low-MAPQ counts, and an estimated FRiP score. + + +----- + +**Tips** + +- For bulk ATAC-seq peak calling, use the ``atac`` preset with **Tn5 shift** enabled and **BED** output. +- For scATAC-seq, add a barcode file and whitelist; the summary CSV will contain one row per cell barcode. +- For Hi-C contact matrix generation, use the ``hic`` preset with **4DN pairs** output and enable **Split alignments**. + + + ]]></help> + <expand macro="citations"/> + <expand macro="creator"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,21 @@ +<macros> + <token name="@TOOL_VERSION@">0.3.2</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">25.0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">chromap</requirement> + </requirements> + </xml> + <xml name="creator"> + <creator> + <person givenName="Saim" familyName="Momin" url="https://github.com/SaimMomin12"/> + <organization name="Galaxy Europe" url="https://galaxyproject.org/eu/"/> + </creator> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1038/s41467-021-26865-w</citation> + </citations> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/barcode.fq Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,32 @@ +@read_1 +TAAACTTGGCTTGACT ++ +IIIIIIIIIIIIIIII +@read_2 +TGAGCTCGCTTTAACG ++ +IIIIIIIIIIIIIIII +@read_3 +TCGCTACACGCCGAAT ++ +IIIIIIIIIIIIIIII +@read_4 +CATATATGACAATTCT ++ +IIIIIIIIIIIIIIII +@read_5 +GCACGGGTAAAAAAGC ++ +IIIIIIIIIIIIIIII +@read_6 +CCACAATGAATCTTTA ++ +IIIIIIIIIIIIIIII +@read_7 +CCGTTTGGTCGACGAT ++ +IIIIIIIIIIIIIIII +@read_8 +CATCTGCCCGCTCTGG ++ +IIIIIIIIIIIIIIII
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/read1.fq Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,32 @@ +@read_1/1 +TAACGGAGATCGTGCGAATAACCTGTCTAATATCTACTAAAGGTATCTCC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_2/1 +CGTTAATTAAAGACCAAGAACTTGCAATTTGGCATTCAATTAACTCTACC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_3/1 +TGATCCAAACCCTACGGCGACAGCAAATAGGAGATCCATAAGGAGTTAAC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_4/1 +CTCACACAATGTGTTTAGACTGGGTAGTTCGTTTTAATCGCGTTAATTGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_5/1 +CGTAAGGTGAAAATTAAGGATTTATCTGCGTATGCCTGTGAATATGTATA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_6/1 +GGAGCGACGTTTTTCCACGCGTGCACTTTGACCACATGTACAAGTCGAAC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_7/1 +AACTCTAGAATTCAGGGATTTCCTGGGCAAGAAAATTAAGGTGCGGGCTT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_8/1 +TCTTCGTTATGGGATCTGACGAATTACCTACTGTACCATTCTCCAGTCTC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/read1_se.fq Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,32 @@ +@read_1 +TAACGGAGATCGTGCGAATAACCTGTCTAATATCTACTAAAGGTATCTCC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_2 +CGTTAATTAAAGACCAAGAACTTGCAATTTGGCATTCAATTAACTCTACC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_3 +TGATCCAAACCCTACGGCGACAGCAAATAGGAGATCCATAAGGAGTTAAC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_4 +CTCACACAATGTGTTTAGACTGGGTAGTTCGTTTTAATCGCGTTAATTGT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_5 +CGTAAGGTGAAAATTAAGGATTTATCTGCGTATGCCTGTGAATATGTATA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_6 +GGAGCGACGTTTTTCCACGCGTGCACTTTGACCACATGTACAAGTCGAAC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_7 +AACTCTAGAATTCAGGGATTTCCTGGGCAAGAAAATTAAGGTGCGGGCTT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_8 +TCTTCGTTATGGGATCTGACGAATTACCTACTGTACCATTCTCCAGTCTC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/read2.fq Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,32 @@ +@read_1/2 +GGTAGAGTTAATTGAATGCCAAATTGCAAGTTCTTGGTCTTTAATTAACG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_2/2 +GTTAACTCCTTATGGATCTCCTATTTGCTGTCGCCGTAGGGTTTGGATCA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_3/2 +ACAATTAACGCGATTAAAACGAACTACCCAGTCTAAACACATTGTGTGAG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_4/2 +TATACATATTCACAGGCATACGCAGATAAATCCTTAATTTTCACCTTACG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_5/2 +GTTCGACTTGTACATGTGGTCAAAGTGCACGCGTGGAAAAACGTCGCTCC ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_6/2 +AAGCCCGCACCTTAATTTTCTTGCCCAGGAAATCCCTGAATTCTAGAGTT ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_7/2 +GAGACTGGAGAATGGTACAGTAGGTAATTCGTCAGATCCCATAACGAAGA ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +@read_8/2 +AATAATGTACAAATCGGTTTATGTCGTTCAATATCTGCAGCTACGGCTTG ++ +IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ref.fa Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,18 @@ +>chr1 +GAAAGGCATAATAAGTATCACGTACTAACGCGTCTTCGCTGAAAAAAAGTTAACGGAGAT +CGTGCGAATAACCTGTCTAATATCTACTAAAGGTATCTCCAGGTAGATTCCATACCAGGA +GTGTATACCCTACCATAGGATTACTATGATCGTTAATTAAAGACCAAGAACTTGCAATTT +GGCATTCAATTAACTCTACCCCCATATATCAGTTCCTGAACTTGATTCACAATGAACATG +TTTCAGATGATGATCCAAACCCTACGGCGACAGCAAATAGGAGATCCATAAGGAGTTAAC +CTCTAATCGCCAAAGCTGACCCCCAGTCCCCAGACCACTTGAAATCCAGTCTCACACAAT +GTGTTTAGACTGGGTAGTTCGTTTTAATCGCGTTAATTGTTATCGAATGTCGGAAAATCA +TGAGTAGAGGATACTAACTCGCTCCGGTCTCGTAAGGTGAAAATTAAGGATTTATCTGCG +TATGCCTGTGAATATGTATAGATTAGATATATGTGCAAATCTGGGGCAAAAGTAGGAGGA +CCAATGCTGAGGAGCGACGTTTTTCCACGCGTGCACTTTGACCACATGTACAAGTCGAAC +AGTGGGTGAAGTTTTTGTGAAAAATGAATGCTAAAAAATACTGACTCTTTAACTCTAGAA +TTCAGGGATTTCCTGGGCAAGAAAATTAAGGTGCGGGCTTGCCAATGTAAGGCTTAATTA +ACCTCCGAAGTGCATGTATTGCTGACCTTTTCTTCGTTATGGGATCTGACGAATTACCTA +CTGTACCATTCTCCAGTCTCAATCTATTTTTTAGTAGAGGCTGCCTATTCCTTTGTGATC +TGGCCCTTGGCAAGCCGTAGCTGCAGATATTGAACGACATAAACCGATTTGTACATTATT +CACGATGGAGTCAGGTGGGGGCGCGTTTGAAGAATCTCCACTCGTACACCGCCCTGATTG +GTCCAAACTCAACCTTACTTACATGGCTGATATTCATTCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test01_mapping.bed Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,8 @@ +chr1 50 200 N 60 + 1 +chr1 150 300 N 60 + 1 +chr1 250 400 N 60 + 1 +chr1 350 500 N 60 + 1 +chr1 450 600 N 60 + 1 +chr1 550 700 N 60 + 1 +chr1 650 800 N 60 + 1 +chr1 750 900 N 60 + 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test01_summary.txt Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,2 @@ +barcode,total,duplicate,unmapped,lowmapq,cachehit,fric,estfrip,numcacheslots +,8,0,0,0,0,0.00000,0.00000,0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test02_mapping.sam Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,9 @@ +@SQ SN:chr1 LN:1000 +read_1 0 chr1 51 60 50M * 0 0 TAACGGAGATCGTGCGAATAACCTGTCTAATATCTACTAAAGGTATCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 MD:Z:50 +read_2 0 chr1 151 60 50M * 0 0 CGTTAATTAAAGACCAAGAACTTGCAATTTGGCATTCAATTAACTCTACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 MD:Z:50 +read_3 0 chr1 251 60 50M * 0 0 TGATCCAAACCCTACGGCGACAGCAAATAGGAGATCCATAAGGAGTTAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 MD:Z:50 +read_4 0 chr1 351 60 50M * 0 0 CTCACACAATGTGTTTAGACTGGGTAGTTCGTTTTAATCGCGTTAATTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 MD:Z:50 +read_5 0 chr1 451 60 50M * 0 0 CGTAAGGTGAAAATTAAGGATTTATCTGCGTATGCCTGTGAATATGTATA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 MD:Z:50 +read_6 0 chr1 551 60 50M * 0 0 GGAGCGACGTTTTTCCACGCGTGCACTTTGACCACATGTACAAGTCGAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 MD:Z:50 +read_7 0 chr1 651 60 50M * 0 0 AACTCTAGAATTCAGGGATTTCCTGGGCAAGAAAATTAAGGTGCGGGCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 MD:Z:50 +read_8 0 chr1 751 60 50M * 0 0 TCTTCGTTATGGGATCTGACGAATTACCTACTGTACCATTCTCCAGTCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII NM:i:0 MD:Z:50
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test03_mapping.tsv Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,16 @@ +chr1 50 100 N 39 + +chr1 150 200 N 39 - 1 +chr1 150 200 N 39 + +chr1 250 300 N 39 - 1 +chr1 250 300 N 39 + +chr1 350 400 N 39 - 1 +chr1 350 400 N 39 + +chr1 450 500 N 39 - 1 +chr1 450 500 N 39 + +chr1 550 600 N 39 - 1 +chr1 550 600 N 39 + +chr1 650 700 N 39 - 1 +chr1 650 700 N 39 + +chr1 750 800 N 39 - 1 +chr1 750 800 N 39 + +chr1 850 900 N 39 - 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test04_mapping.pairs Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,12 @@ +## pairs format v1.0.0 +#shape: upper triangle +#chromsize: chr1 1000 +#columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type +read_1/1 chr1 51 chr1 200 + - UU +read_2/1 chr1 151 chr1 300 + - UU +read_3/1 chr1 251 chr1 400 + - UU +read_4/1 chr1 351 chr1 500 + - UU +read_5/1 chr1 451 chr1 600 + - UU +read_6/1 chr1 551 chr1 700 + - UU +read_7/1 chr1 651 chr1 800 + - UU +read_8/1 chr1 751 chr1 900 + - UU
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test05_mapping.bed Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,8 @@ +chr1 54 104 TAAACTTGGCTTGACT 1 +chr1 154 204 TGAGCTCGCTTTAACG 1 +chr1 254 304 TCGCTACACGCCGAAT 1 +chr1 354 404 CATATATGACAATTCT 1 +chr1 454 504 GCACGGGTAAAAAAGC 1 +chr1 554 604 CCACAATGAATCTTTA 1 +chr1 654 704 CCGTTTGGTCGACGAT 1 +chr1 754 804 CATCTGCCCGCTCTGG 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test05_summary.txt Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,10 @@ +barcode,total,duplicate,unmapped,lowmapq,cachehit,fric,estfrip,numcacheslots +TCGCTACACGCCGAAT,1,0,0,0,0,0.00000,0.00000,0 +CCGTTTGGTCGACGAT,1,0,0,0,0,0.00000,0.00000,0 +TGAGCTCGCTTTAACG,1,0,0,0,0,0.00000,0.00000,0 +CATATATGACAATTCT,1,0,0,0,0,0.00000,0.00000,0 +TAAACTTGGCTTGACT,1,0,0,0,0,0.00000,0.00000,0 +GCACGGGTAAAAAAGC,1,0,0,0,0,0.00000,0.00000,0 +CATCTGCCCGCTCTGG,1,0,0,0,0,0.00000,0.00000,0 +CCACAATGAATCTTTA,1,0,0,0,0,0.00000,0.00000,0 +non-whitelist,0,0,0,0,0,0.00000,0.00000,0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test06_mapping.bed Tue Feb 17 19:09:08 2026 +0000 @@ -0,0 +1,8 @@ +chr1 54 104 N 60 + 1 +chr1 154 204 N 60 + 1 +chr1 254 304 N 60 + 1 +chr1 354 404 N 60 + 1 +chr1 454 504 N 60 + 1 +chr1 554 604 N 60 + 1 +chr1 654 704 N 60 + 1 +chr1 754 804 N 60 + 1
