changeset 0:2c289e3b566a draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/snapatac2 commit e0f59fae19e57f54ae0c351a16dd1805d12aba1d
author iuc
date Tue, 25 Nov 2025 16:39:58 +0000
parents
children
files macros.xml network.xml test-data/all_fasta.loc test-data/chr21.gff3.gz test-data/chr21_small.fasta.gz test-data/cisBP_human.meme.gz test-data/gene_sets.loc test-data/meme.loc tool-data/all_fasta.loc.sample tool-data/gene_sets.loc.sample tool-data/meme.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 13 files changed, 731 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Nov 25 16:39:58 2025 +0000
@@ -0,0 +1,256 @@
+<macros>
+    <token name="@TOOL_VERSION@">2.8.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">24.0</token>
+    <xml name="xrefs">
+        <xrefs>
+            <xref type="bio.tools">snapatac</xref>
+        </xrefs>
+    </xml>
+    <xml name="requirements">
+        <requirement type="package" version="@TOOL_VERSION@">snapatac2</requirement>
+            <requirement type="package" version="0.8.37">hdbscan</requirement>
+            <requirement type="package" version="0.10.2">leidenalg</requirement>
+            <requirement type="package" version="0.5.7">umap-learn</requirement>
+            <requirement type="package" version="3.0.4">xgboost</requirement>
+            <requirement type="package" version="0.2.1">python-kaleido</requirement>
+            <requirement type="package" version="1.31.0">polars</requirement>
+            <requirement type="package" version="5.24.1">plotly</requirement>
+            <requirement type="package" version="0.2.1">python-kaleido</requirement>
+            <requirement type="package" version="0.0.10">harmonypy</requirement>
+            <requirement type="package" version="1.7.4">scanorama</requirement>
+        <yield />
+    </xml>
+
+    <!-- command section -->
+    <token name="@CMD_PREP_ADATA@"><![CDATA[
+        ## ln -s does not work here
+        cp '$method.adata' 'anndata.h5ad' &&
+    ]]></token>
+    <token name="@CMD@"><![CDATA[
+        cat '$script_file' > '$hidden_output' &&
+        python '$script_file' >> '$hidden_output' &&
+        touch 'anndata_info.txt' &&
+        cat 'anndata_info.txt' @CMD_PRETTIFY_STDOUT@
+    ]]></token>
+    <token name="@CMD_PRETTIFY_STDOUT@"><![CDATA[
+        | sed -r '1 s|AnnData object with (.+) = (.*)\s*|\1: \2|g' | sed "s|'||g"  | sed -r 's|^\s*(.*):\s(.*)|[\1]\n-    \2|g' | sed 's|, |\n-    |g'
+    ]]></token>
+    <token name="@CMD_GET_GFF@"><![CDATA[
+        #if $method.gff_file_condi.gffSource == 'cached':
+            ln -s '$method.gff_file_condi.gff_pre_installed.fields.path' gff &&
+        #else:
+            ln -s '$method.gff_file_condi.gff_history' gff &&
+        #end if
+    ]]></token>
+    <token name="@CMD_GET_FASTA@"><![CDATA[
+        #if $method.fasta_file_condi.fastaSource == 'indexed':
+            zcat '$method.fasta_file_condi.fasta_pre_installed.fields.path' > fasta.fa &&
+            echo "Using built-in FASTA: '$method.fasta_file_condi.fasta_pre_installed.fields.name'" >&2 &&
+        #else:
+            #if $method.fasta_file_condi.fasta_history.ext.endswith('.gz')
+                zcat '$method.fasta_file_condi.fasta_history' > fasta.fa &&
+            #else:
+            ln -s '$method.fasta_file_condi.fasta_history' fasta.fa &&
+            #end if
+        #end if
+    ]]></token>
+
+    <!-- Config section -->
+    <token name="@CONF_IMPORTS@"><![CDATA[
+import snapatac2 as snap
+import os
+    ]]></token>
+    <token name="@CONF_READ_INPUTS@"><![CDATA[
+adata = snap.read('anndata.h5ad', backed = None)
+    ]]></token>
+    <token name="@CONF_ANNDATA_WRITE_OUTPUTS@"><![CDATA[
+adata.write_h5ad('anndata.h5ad.gz', compression='gzip')
+with open('anndata_info.txt','w', encoding='utf-8') as ainfo:
+    print(adata, file=ainfo)
+    ]]></token>
+    <token name="@CONF_PARAMS_RENDER_PLOT@"><![CDATA[
+    width = $method.width,
+    height = $method.height,
+    show = False,
+    interactive = False,
+    out_file = 'plot.$method.out_file',
+    ]]></token>
+    <token name="@CONF_PARAMS_DATA_INTEGRATION@"><![CDATA[
+use_rep = '$method.use_rep',
+#if $method.use_dims != ''
+#set $dims = ([x.strip() for x in str($method.use_dims).split(',')])
+use_dims=$dims,
+#end if
+#if $method.groupby != ''
+#set $groupby = ([x.strip() for x in str($method.groupby).split(',')])
+groupby=$groupby,
+#end if
+#if $method.key_added != ''
+key_added = '$method.key_added',
+#end if
+    ]]></token>
+    <token name="@CONF_IMPORT_MEME@"><![CDATA[
+motifs = read_motifs("input.meme")
+for motif in motifs:
+    motif.name = motif.id.split('+')[0]
+
+unique_motifs = {}
+for motif in motifs:
+    name = motif.name
+    if (
+            name not in unique_motifs or
+            unique_motifs[name].info_content() < motif.info_content()
+        ):
+        unique_motifs[name] = motif
+motifs = list(unique_motifs.values())
+
+
+#else:
+motifs = read_motifs("input.meme")
+for motif in motifs:
+    motif.name = motif.id.split('_')[0]
+    motif.family = motif.id.split('+')[-1]
+    ]]></token>
+
+    <!-- input section -->
+    <xml name="sanitize_query" token_validinitial="string.printable">
+        <sanitizer>
+            <valid initial="@VALIDINITIAL@">
+                <remove value="&apos;" />
+                <yield/>
+            </valid>
+        </sanitizer>
+    </xml>
+
+    <xml name="param_inputs_anndata" token_multiple="false" token_label="Annotated data matrix">
+        <param name="adata" type="data" multiple="@MULTIPLE@" format="h5ad" label="@LABEL@"/>
+    </xml>
+    <xml name="param_groupby">
+        <param argument="groupby" type="text" label="The key of the observation grouping to consider">
+            <expand macro="sanitize_query" />
+        </param>
+    </xml>
+    <xml name="param_common_advanced">
+        <section name="advanced_common" title="Advanced Options" expanded="false">
+            <param name="show_log" type="boolean" checked="false" label="Output Log?" />
+        </section>
+    </xml>
+    <xml name="param_render_plot">
+        <param argument="width" type="integer" value="600" label="Width of the plot"/>
+        <param argument="height" type="integer" value="400" label="Height of the plot"/>
+        <param name="out_file" type="select" optional="true" label="Type of output plot">
+            <option value="png" selected="true">PNG</option>
+            <option value="svg">SVG</option>
+            <option value="pdf">PDF</option>
+            <option value="html">HTML</option>
+        </param>
+    </xml>
+    <xml name="param_shift" tokens="varname" token_value="0" token_label="Insertion site correction for the left end">
+        <param argument="@VARNAME@" type="integer" value="@VALUE@" label="@LABEL@" help="Note this has no effect on single-end reads"/>
+    </xml>
+    <xml name="param_chunk_size" tokens="size">
+        <param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/>
+    </xml>
+    <xml name="param_min_max_frag_size">
+        <param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/>
+        <param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/>
+    </xml>
+    <xml name="param_data_integration">
+        <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/>
+        <param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation">
+            <expand macro="sanitize_query"/>
+        </param>
+        <param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider">
+            <expand macro="sanitize_query" />
+        </param>
+        <param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/>
+    </xml>
+    <xml name="param_random_state" token_label="Seed of the random state generator" token_help="">
+        <param argument="random_state" type="integer" value="0" label="@LABEL@" help="@HELP@"/>
+    </xml>
+    <xml name="param_key_added" tokens="key_added">
+        <param argument="key_added" type="text" value="@KEY_ADDED@"  label="`adata.obs` key under which t add cluster labels"/>
+    </xml>
+    <xml name="param_use_rep" token_label="Use the indicated representation in `.obsm`">
+        <param argument="use_rep" type="text" value="X_spectral" label="@LABEL@"/>
+    </xml>
+    <xml name="param_n_iterations">
+        <param argument="n_iterations" type="integer" value="-1" label="How many iterations of the Leiden clustering algorithm to perform"
+            help="Positive values above 2 define the total number of iterations to perform, -1 has the algorithm run until it reaches its optimal clustering."/>
+    </xml>
+    <xml name="param_counting_strategy">
+        <param argument="counting_strategy" type="select" label="The strategy to compute feature counts">
+            <option value="fragment">"fragment": based on the number of fragments that overlap with a region of interest</option>
+            <option value="insertion">"insertion": based on the number of insertions that overlap with a region of interest</option>
+            <option value="paired-insertion" selected="true">"paired-insertion": similar to "insertion", but it only counts the insertions once if the pair of insertions of a fragment are both within the same region of interest</option>
+        </param>
+    </xml>
+    <xml name="param_chrom_sizes">
+        <param argument="chrom_sizes" type="data" format="tabular" label="Chromosome sizes" help="First column the chromosome name and second column the size"/>
+    </xml>
+    <xml name="param_genome_fasta">
+        <conditional name="fasta_file_condi">
+            <param name="fastaSource" type="select" label="Select a built-in FASTA or one from your history" help="Choose history if you don't see the correct FASTA.">
+                <option value="indexed" selected="true">Use a built-in FASTA</option>
+                <option value="history">Use a FASTA from history</option>
+            </param>
+            <when value="indexed">
+                <param name="fasta_pre_installed" type="select" label="Select a FASTA file" help="Select the FASTA file from a list of pre-installed genomes">
+                    <options from_data_table="all_fasta">
+                        <filter type="sort_by" column="2" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="fasta_history" type="data" format="fasta,fasta.gz" label="FASTA file" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="param_gene_anno">
+        <conditional name="gff_file_condi">
+            <param name="gffSource" type="select" label="Select a built-in GFF file or one from your history"  help="Choose history if you don't see the correct GFF" >
+                <option value="cached" selected="true">Use a built-in GFF</option>
+                <option value="history">Use a GFF from history</option>
+            </param>
+            <when value="cached">
+                <param name="gff_pre_installed" type="select" label="Select a GFF file" help="Select the GFF from a list of pre-installed files">
+                    <options from_data_table="gene_sets">
+                        <filter type="sort_by" column="1" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="gff_history" type="data" format="gff3.gz" label="Select a GFF file" help="Make sure that the GFF corresponds to the same genome as the FASTA"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="param_n_comps" token_value="30" token_label="Number of dimensions to keep" token_help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30.">
+        <param argument="n_comps" type="integer" value="@VALUE@" label="@LABEL@" help="@HELP@"/>
+    </xml>
+    <xml name="param_meme_table">
+        <param name="motifs" type="select" label="Select list of transcription factor motifs">
+            <options from_data_table="meme">
+                <filter type="sort_by" column="2" />
+            </options>
+        </param>
+    </xml>
+
+
+    <!-- test section -->
+    <xml name="test_param_render_plot">
+        <param name="width" value="650"/>
+        <param name="height" value="450"/>
+    </xml>
+    <xml name="test_render_plot_matching_text">
+        <has_text_matching expression="width = 650"/>
+        <has_text_matching expression="height = 450"/>
+    </xml>
+
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41592-023-02139-9</citation>
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/network.xml	Tue Nov 25 16:39:58 2025 +0000
@@ -0,0 +1,397 @@
+<tool id="snapatac2_network" name="SnapATAC2 network" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>analysis</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <requirements>
+        <expand macro="requirements"/>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        gunzip -c '$motifs.fields.path' > 'input.meme' &&
+        #if $gff_file_condi.gffSource == 'cached':
+            ln -s '$gff_file_condi.gff_pre_installed.fields.path' anno.gff &&
+        #else:
+            ln -s '$gff_file_condi.gff_history' anno.gff &&
+        #end if
+        #if $fasta_file_condi.fastaSource == 'indexed':
+            zcat '$fasta_file_condi.fasta_pre_installed.fields.path' > fasta.fa &&
+        #else:
+            zcat '$fasta_file_condi.fasta_history' > fasta.fa &&
+        #end if
+        mkdir -p network_dir &&
+        cp '$adata_rna' 'anndata_rna.h5ad' &&
+        cp '$adata_atac' 'anndata_atac.h5ad' &&
+@CMD@
+    ]]></command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[
+@CONF_IMPORTS@
+## import motifs
+from snapatac2._snapatac2 import read_motifs, PyDNAMotif
+#if $motifs == 'cisbp':
+@CONF_IMPORT_MEME@
+#end if
+rna = snap.read('anndata_rna.h5ad', backed = None)
+atac = snap.read('anndata_atac.h5ad', backed = None)
+
+marker_peaks = snap.tl.marker_regions(
+    atac,
+    groupby = '$groupby',
+    pvalue = $pvalue_marker
+)
+
+
+for cluster_id, peaks_index in marker_peaks.items():
+    print(f"Processing cluster {cluster_id} with {len(peaks_index)} peaks...")
+
+    # Convert pandas Index to list of strings
+    regions_list = peaks_index.tolist()
+
+    # Create network for this cluster
+    network = snap.tl.init_network_from_annotation(
+        regions = regions_list,
+        anno_file = 'anno.gff',
+        upstream = $upstream,
+        downstream = $downstream,
+        id_type = '$id_type',
+        coding_gene_only = $coding_gene_only
+    )
+
+    # add cor scores
+    snap.tl.add_cor_scores(
+        network,
+        gene_mat=rna,
+        peak_mat=atac,
+        select=None, # Will get available if requested by users
+        overwrite=False
+        )
+    # add regr scores
+
+    snap.tl.add_regr_scores(
+        network,
+        gene_mat=rna,
+        peak_mat=atac,
+        select=None, # Will get available if requested by users
+        method = '$method',
+        scale_X = $scale_X,
+        scale_Y = $scale_Y,
+        alpha = $alpha,
+        l1_ratio = $l1_ratio,
+        overwrite=False
+        )
+
+    # add tf binding
+    snap.tl.add_tf_binding(
+        network,
+        motifs = motifs,
+        genome_fasta = 'fasta.fa',
+        pvalue = $pvalue_tf
+    )
+
+    ## No good documentation, will get updated in next version if a tutorial is available for it
+    ## # link TF to gene
+    ## genetic_network = snap.tl.link_tf_to_gene(
+    ##     network
+    ## )
+
+    # Create plot for this cluster
+    snap.pl.network_edge_stat(
+        network = network,
+        width = $width,
+        height = $height,
+        show = False,
+        interactive = False,
+        out_file = f'network_dir/plot_network_cluster_{cluster_id}.png',
+    )
+
+    ## No good documentation, will get updated in next version if a tutorial is available for it
+    ## snap.pl.network_edge_stat(
+    ##     network = genetic_network,
+    ##     width = $width,
+    ##     height = $height,
+    ##     show = False,
+    ##     interactive = False,
+    ##     out_file = f'network_dir/plot_genetic_network_cluster_{cluster_id}.png',
+    ## )
+    print(f"Completed cluster {cluster_id}")
+
+print("All clusters processed!")
+    ]]></configfile>
+    </configfiles>
+    <inputs>
+        <param name="adata_rna" type="data" format="h5ad" label="Annotated data matrix containing RNA data"/>
+        <param name="adata_atac" type="data" format="h5ad" label="Annotated data matrix containing ATAC data"/>
+        <expand macro="param_groupby"/>
+        <param argument="pvalue_marker" type="float" value="0.01" label="P-value threshold to detect marker regions"/>
+        <expand macro="param_gene_anno"/>
+        <expand macro="param_genome_fasta"/>
+        <param name="upstream" type="integer" value="250000" min="0" label="Upstream extension to the transcription start site"/>
+        <param name="downstream" type="integer" value="250000" min="0" label="Downstream extension to the transcription start site"/>
+        <param name="id_type" type="select" label="ID type of the gene in the annotation file">
+            <option value="gene_name">Gene name</option>
+            <option value="gene_id">Gene ID</option>
+            <option value="transcript_id">Transcript ID</option>
+        </param>
+        <param name="coding_gene_only" type="boolean" checked="true" truevalue="True" falsevalue="False" label="Retain only coding genes in the network"/>
+        <param name="method" type="select" label="Regresson model">
+            <!-- <option value="elastic_net">elastic_net</option> has problem with sparse matrix-->
+            <option value="gb_tree">gb_tree</option>
+        </param>
+        <param name="scale_X" type="boolean" checked="false" truevalue="True" falsevalue="False" label="Whether to scale the features"/>
+        <param name="scale_Y" type="boolean" checked="false" truevalue="True" falsevalue="False" label="Whether to scale the response variable"/>
+        <param name="alpha" type="float" value="1.5" label="Constant that multiplies the penalty terms in ‘elastic_net’"/>
+        <param name="l1_ratio" type="float" min="0" max="1" value="0.5" label="L1 penalty ratio" help="If set to 0, L2 penalty is used, if set to 1, L1 penalty is used, and if set between 0 and 1, a combination of both penalties is used."/>
+        <expand macro="param_meme_table"/>
+        <param argument="pvalue_tf" type="float" value="0.01" label="P-value threshold to detect TF binding"/>
+        <expand macro="param_render_plot"/>
+        <expand macro="param_common_advanced"/>
+    </inputs>
+    <outputs>
+        <data name="hidden_output" format="txt" label="Log file" >
+            <filter>advanced_common['show_log']</filter>
+        </data>
+        <collection name="out_network" type="list" label="${tool.name} (${method}) on ${on_string} - Network">
+            <discover_datasets pattern="__name_and_ext__" directory="network_dir/"/>
+            <filter>out_file != 'svg' and out_file != 'html'</filter>
+        </collection>
+        <collection name="out_network_svg" type="list" label="${tool.name} (${method}) on ${on_string} - Network - svg">
+            <discover_datasets pattern="__name_and_ext__" directory="network_dir/"/>
+            <filter>out_file == 'svg'</filter>
+        </collection>
+        <collection name="out_network_html" type="list" label="${tool.name} (${method}) on ${on_string} - Network - html">
+            <discover_datasets pattern="__name_and_ext__" directory="network_dir/"/>
+            <filter>out_file == 'html'</filter>
+        </collection>
+    </outputs>
+    <tests>
+        <!-- metrics.frag_size_distr cached -->
+        <test expect_num_outputs="2">
+            <param name="adata_rna" location="https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_RNA.h5ad.gz"/>
+            <param name="adata_atac" location="https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_ATAC.h5ad.gz"/>
+            <param name="groupby" value="cell_type"/>
+            <param name="pvalue_marker" value="0.1"/>
+            <conditional name="gff_file_condi">
+                <param name="gffSource" value="cached"/>
+                <param name="gff_pre_installed" value="hg38"/>
+            </conditional>
+            <conditional name="fasta_file_condi">
+                <param name="fastaSource" value="indexed"/>
+                <param name="fasta_pre_installed" value="hg38"/>
+            </conditional>
+            <param name="out_file" value="png"/>
+            <expand macro="test_param_render_plot"/>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="snap.tl.marker_regions"/>
+                    <has_text_matching expression="snap.tl.init_network_from_annotation"/>
+                    <has_text_matching expression="snap.tl.add_cor_scores"/>
+                    <has_text_matching expression="snap.tl.add_regr_scores"/>
+                    <has_text_matching expression="add_tf_binding"/>
+                    <!-- <has_text_matching expression="snap.tl.link_tf_to_gene"/> -->
+                    <has_text_matching expression="snap.pl.network_edge_stat"/>
+                    <has_text_matching expression="groupby = 'cell_type'"/>
+                    <expand macro="test_render_plot_matching_text"/>
+                </assert_contents>
+            </output>
+            <output_collection name="out_network" type="list" count="8">
+                <element name="plot_network_cluster_CD14 Mono">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+                <element name="plot_network_cluster_CD16 Mono">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+                <element name="plot_network_cluster_CD4 Naive">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+                <element name="plot_network_cluster_CD4 TCM">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+                <element name="plot_network_cluster_CD8 Naive">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+                <element name="plot_network_cluster_NK">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+                <element name="plot_network_cluster_Treg">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+                <element name="plot_network_cluster_cDC">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <!-- metrics.frag_size_distr history -->
+        <test expect_num_outputs="2">
+            <param name="adata_rna" location="https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_RNA.h5ad.gz"/>
+            <param name="adata_atac" location="https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_ATAC.h5ad.gz"/>
+            <param name="groupby" value="cell_type"/>
+            <param name="pvalue_marker" value="0.1"/>
+            <conditional name="gff_file_condi">
+                <param name="gffSource" value="history"/>
+                <param name="gff_history" location="https://zenodo.org/records/17512085/files/chr21.gff3.gz"/>
+            </conditional>
+            <conditional name="fasta_file_condi">
+                <param name="fastaSource" value="history"/>
+                <param name="fasta_history" location="https://zenodo.org/records/17512085/files/chr21.fasta.gz"/>
+            </conditional>
+            <param name="out_file" value="png"/>
+            <expand macro="test_param_render_plot"/>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="snap.tl.marker_regions"/>
+                    <has_text_matching expression="snap.tl.init_network_from_annotation"/>
+                    <has_text_matching expression="snap.tl.add_cor_scores"/>
+                    <has_text_matching expression="snap.tl.add_regr_scores"/>
+                    <has_text_matching expression="add_tf_binding"/>
+                    <!-- <has_text_matching expression="snap.tl.link_tf_to_gene"/> -->
+                    <has_text_matching expression="snap.pl.network_edge_stat"/>
+                    <has_text_matching expression="groupby = 'cell_type'"/>
+                    <expand macro="test_render_plot_matching_text"/>
+                </assert_contents>
+            </output>
+            <output_collection name="out_network" type="list" count="8">
+                <element name="plot_network_cluster_CD14 Mono">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+                <element name="plot_network_cluster_CD16 Mono">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+                <element name="plot_network_cluster_CD4 Naive">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+                <element name="plot_network_cluster_CD4 TCM">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+                <element name="plot_network_cluster_CD8 Naive">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+                <element name="plot_network_cluster_NK">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+                <element name="plot_network_cluster_Treg">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+                <element name="plot_network_cluster_cDC">
+                    <assert_contents>
+                        <has_image_center_of_mass center_of_mass="325,225" eps="20"/>
+                        <has_image_channels channels="4"/>
+                        <has_image_height height="450" delta="20"/>
+                        <has_image_width width="650" delta="20"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+Build CRE-gene network from gene annotations, using `SnapATAC2`
+===============================================================
+
+Link CREs to genes if they are close to genes’ promoter regions.
+
+More details on the `SnapATAC2 documentation
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.init_network_from_annotation.html>`__
+
+
+Compute correlation scores for any two connected nodes in the network.
+
+More details on the `SnapATAC2 documentation
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.add_cor_scores.html>`__
+
+
+Perform regression analysis for nodes and their parents in the network.
+
+More details on the `SnapATAC2 documentation
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.add_regr_scores.html>`__
+
+
+Add TF motif binding information.
+
+More details on the `SnapATAC2 documentation
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.add_tf_binding.html>`__
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc	Tue Nov 25 16:39:58 2025 +0000
@@ -0,0 +1,1 @@
+hg38	hg38	Human (hg38)	${__HERE__}/chr21_small.fasta.gz
\ No newline at end of file
Binary file test-data/chr21.gff3.gz has changed
Binary file test-data/chr21_small.fasta.gz has changed
Binary file test-data/cisBP_human.meme.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_sets.loc	Tue Nov 25 16:39:58 2025 +0000
@@ -0,0 +1,1 @@
+hg38	hg38	hg38GFF	${__HERE__}/chr21.gff3.gz
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme.loc	Tue Nov 25 16:39:58 2025 +0000
@@ -0,0 +1,1 @@
+cisbp	snap.datasets.cis_bp(unique=True)	${__HERE__}/cisBP_human.meme.gz
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Tue Nov 25 16:39:58 2025 +0000
@@ -0,0 +1,17 @@
+#This file lists the locations and dbkeys of all the genome and transcriptome fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel4.5	apiMel4.5	Honeybee (Apis mellifera): apiMel4.5	/path/to/genome/apiMel4.5/apiMel4.5.fa
+#hg38canon	hg38	Human (Homo sapiens): hg38 Canonical	/path/to/genome/hg38/hg38canon.fa
+#hg38full	hg38	Human (Homo sapiens): hg38 Full	/path/to/genome/hg38/hg38full.fa
+#hg38full.90	hg38    Human (Homo sapiens): hg38 Full Trans v90	/path/to/genome/hg38/hg38fulltrans.fa
+
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg38 above.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gene_sets.loc.sample	Tue Nov 25 16:39:58 2025 +0000
@@ -0,0 +1,14 @@
+# This is a sample file distributed with featureCounts that enables it and other# tools to use gene/exon annotations in the GFF/GTF format.
+#
+# The gene_sets.loc file syntax is:
+#<unique_build_id>	<dbkey>	<display_name>	<path>
+#
+# Please ensure that the above fields are tab separated.
+#
+# In case you have TWO or MORE providers PER dbkey, the one mentioned
+# first in the file, should have the "default" priority.
+#
+#Example:
+#
+#Homo_sapiens.GRCh38.90	hg38	GRCh38 (hg38) annotation from Ensembl, release 90	/depot/data2/galaxy/hg38/gene_sets/Homo_sapiens.GRCh38.90.gtf
+#Homo_sapiens.GRCh37.87	hg19	GRCh37 (hg19) annotation from Ensembl, release 87	/depot/data2/galaxy/hg19/gene_sets/Homo_sapiens.GRCh37.87.gtf
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/meme.loc.sample	Tue Nov 25 16:39:58 2025 +0000
@@ -0,0 +1,13 @@
+# This is a sample file distributed with snapatac2 which enables the tool to perform motif enrichment analysis
+#
+# The meme.loc file syntax is:
+#<unique_id>	<display_name>	<path>
+#
+# Please ensure that the above fields are tab separated.
+#
+# Currently the files should be downloaded manually
+#
+#Example:
+#
+#cisbp cis_bp(unique=True)	/path/to/cisBP_human.meme.gz
+#meuleman_2020	Meuleman_2020 /path/to/Meuleman_2020.meme.gz
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Nov 25 16:39:58 2025 +0000
@@ -0,0 +1,17 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+    <!-- Locations of all gff files with annotations of genome builds -->
+    <table name="gene_sets" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/gene_sets.loc" />
+    </table>
+    <!-- Locations of all meme files -->
+    <table name="meme" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/meme.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Tue Nov 25 16:39:58 2025 +0000
@@ -0,0 +1,14 @@
+<tables>
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+    <table name="gene_sets" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/gene_sets.loc" />
+    </table>
+    <table name="meme" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/meme.loc" />
+    </table>
+</tables>
\ No newline at end of file