Mercurial > repos > iuc > snapatac2_metrics

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Nov 25 16:39:28 2025 +0000
@@ -0,0 +1,256 @@
+<macros>
+    <token name="@TOOL_VERSION@">2.8.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">24.0</token>
+    <xml name="xrefs">
+        <xrefs>
+            <xref type="bio.tools">snapatac</xref>
+        </xrefs>
+    </xml>
+    <xml name="requirements">
+        <requirement type="package" version="@TOOL_VERSION@">snapatac2</requirement>
+            <requirement type="package" version="0.8.37">hdbscan</requirement>
+            <requirement type="package" version="0.10.2">leidenalg</requirement>
+            <requirement type="package" version="0.5.7">umap-learn</requirement>
+            <requirement type="package" version="3.0.4">xgboost</requirement>
+            <requirement type="package" version="0.2.1">python-kaleido</requirement>
+            <requirement type="package" version="1.31.0">polars</requirement>
+            <requirement type="package" version="5.24.1">plotly</requirement>
+            <requirement type="package" version="0.2.1">python-kaleido</requirement>
+            <requirement type="package" version="0.0.10">harmonypy</requirement>
+            <requirement type="package" version="1.7.4">scanorama</requirement>
+        <yield />
+    </xml>
+
+    <!-- command section -->
+    <token name="@CMD_PREP_ADATA@"><![CDATA[
+        ## ln -s does not work here
+        cp '$method.adata' 'anndata.h5ad' &&
+    ]]></token>
+    <token name="@CMD@"><![CDATA[
+        cat '$script_file' > '$hidden_output' &&
+        python '$script_file' >> '$hidden_output' &&
+        touch 'anndata_info.txt' &&
+        cat 'anndata_info.txt' @CMD_PRETTIFY_STDOUT@
+    ]]></token>
+    <token name="@CMD_PRETTIFY_STDOUT@"><![CDATA[
+        | sed -r '1 s|AnnData object with (.+) = (.*)\s*|\1: \2|g' | sed "s|'||g"  | sed -r 's|^\s*(.*):\s(.*)|[\1]\n-    \2|g' | sed 's|, |\n-    |g'
+    ]]></token>
+    <token name="@CMD_GET_GFF@"><![CDATA[
+        #if $method.gff_file_condi.gffSource == 'cached':
+            ln -s '$method.gff_file_condi.gff_pre_installed.fields.path' gff &&
+        #else:
+            ln -s '$method.gff_file_condi.gff_history' gff &&
+        #end if
+    ]]></token>
+    <token name="@CMD_GET_FASTA@"><![CDATA[
+        #if $method.fasta_file_condi.fastaSource == 'indexed':
+            zcat '$method.fasta_file_condi.fasta_pre_installed.fields.path' > fasta.fa &&
+            echo "Using built-in FASTA: '$method.fasta_file_condi.fasta_pre_installed.fields.name'" >&2 &&
+        #else:
+            #if $method.fasta_file_condi.fasta_history.ext.endswith('.gz')
+                zcat '$method.fasta_file_condi.fasta_history' > fasta.fa &&
+            #else:
+            ln -s '$method.fasta_file_condi.fasta_history' fasta.fa &&
+            #end if
+        #end if
+    ]]></token>
+
+    <!-- Config section -->
+    <token name="@CONF_IMPORTS@"><![CDATA[
+import snapatac2 as snap
+import os
+    ]]></token>
+    <token name="@CONF_READ_INPUTS@"><![CDATA[
+adata = snap.read('anndata.h5ad', backed = None)
+    ]]></token>
+    <token name="@CONF_ANNDATA_WRITE_OUTPUTS@"><![CDATA[
+adata.write_h5ad('anndata.h5ad.gz', compression='gzip')
+with open('anndata_info.txt','w', encoding='utf-8') as ainfo:
+    print(adata, file=ainfo)
+    ]]></token>
+    <token name="@CONF_PARAMS_RENDER_PLOT@"><![CDATA[
+    width = $method.width,
+    height = $method.height,
+    show = False,
+    interactive = False,
+    out_file = 'plot.$method.out_file',
+    ]]></token>
+    <token name="@CONF_PARAMS_DATA_INTEGRATION@"><![CDATA[
+use_rep = '$method.use_rep',
+#if $method.use_dims != ''
+#set $dims = ([x.strip() for x in str($method.use_dims).split(',')])
+use_dims=$dims,
+#end if
+#if $method.groupby != ''
+#set $groupby = ([x.strip() for x in str($method.groupby).split(',')])
+groupby=$groupby,
+#end if
+#if $method.key_added != ''
+key_added = '$method.key_added',
+#end if
+    ]]></token>
+    <token name="@CONF_IMPORT_MEME@"><![CDATA[
+motifs = read_motifs("input.meme")
+for motif in motifs:
+    motif.name = motif.id.split('+')[0]
+
+unique_motifs = {}
+for motif in motifs:
+    name = motif.name
+    if (
+            name not in unique_motifs or
+            unique_motifs[name].info_content() < motif.info_content()
+        ):
+        unique_motifs[name] = motif
+motifs = list(unique_motifs.values())
+
+
+#else:
+motifs = read_motifs("input.meme")
+for motif in motifs:
+    motif.name = motif.id.split('_')[0]
+    motif.family = motif.id.split('+')[-1]
+    ]]></token>
+
+    <!-- input section -->
+    <xml name="sanitize_query" token_validinitial="string.printable">
+        <sanitizer>
+            <valid initial="@VALIDINITIAL@">
+                <remove value="&apos;" />
+                <yield/>
+            </valid>
+        </sanitizer>
+    </xml>
+
+    <xml name="param_inputs_anndata" token_multiple="false" token_label="Annotated data matrix">
+        <param name="adata" type="data" multiple="@MULTIPLE@" format="h5ad" label="@LABEL@"/>
+    </xml>
+    <xml name="param_groupby">
+        <param argument="groupby" type="text" label="The key of the observation grouping to consider">
+            <expand macro="sanitize_query" />
+        </param>
+    </xml>
+    <xml name="param_common_advanced">
+        <section name="advanced_common" title="Advanced Options" expanded="false">
+            <param name="show_log" type="boolean" checked="false" label="Output Log?" />
+        </section>
+    </xml>
+    <xml name="param_render_plot">
+        <param argument="width" type="integer" value="600" label="Width of the plot"/>
+        <param argument="height" type="integer" value="400" label="Height of the plot"/>
+        <param name="out_file" type="select" optional="true" label="Type of output plot">
+            <option value="png" selected="true">PNG</option>
+            <option value="svg">SVG</option>
+            <option value="pdf">PDF</option>
+            <option value="html">HTML</option>
+        </param>
+    </xml>
+    <xml name="param_shift" tokens="varname" token_value="0" token_label="Insertion site correction for the left end">
+        <param argument="@VARNAME@" type="integer" value="@VALUE@" label="@LABEL@" help="Note this has no effect on single-end reads"/>
+    </xml>
+    <xml name="param_chunk_size" tokens="size">
+        <param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/>
+    </xml>
+    <xml name="param_min_max_frag_size">
+        <param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/>
+        <param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/>
+    </xml>
+    <xml name="param_data_integration">
+        <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/>
+        <param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation">
+            <expand macro="sanitize_query"/>
+        </param>
+        <param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider">
+            <expand macro="sanitize_query" />
+        </param>
+        <param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/>
+    </xml>
+    <xml name="param_random_state" token_label="Seed of the random state generator" token_help="">
+        <param argument="random_state" type="integer" value="0" label="@LABEL@" help="@HELP@"/>
+    </xml>
+    <xml name="param_key_added" tokens="key_added">
+        <param argument="key_added" type="text" value="@KEY_ADDED@"  label="`adata.obs` key under which t add cluster labels"/>
+    </xml>
+    <xml name="param_use_rep" token_label="Use the indicated representation in `.obsm`">
+        <param argument="use_rep" type="text" value="X_spectral" label="@LABEL@"/>
+    </xml>
+    <xml name="param_n_iterations">
+        <param argument="n_iterations" type="integer" value="-1" label="How many iterations of the Leiden clustering algorithm to perform"
+            help="Positive values above 2 define the total number of iterations to perform, -1 has the algorithm run until it reaches its optimal clustering."/>
+    </xml>
+    <xml name="param_counting_strategy">
+        <param argument="counting_strategy" type="select" label="The strategy to compute feature counts">
+            <option value="fragment">"fragment": based on the number of fragments that overlap with a region of interest</option>
+            <option value="insertion">"insertion": based on the number of insertions that overlap with a region of interest</option>
+            <option value="paired-insertion" selected="true">"paired-insertion": similar to "insertion", but it only counts the insertions once if the pair of insertions of a fragment are both within the same region of interest</option>
+        </param>
+    </xml>
+    <xml name="param_chrom_sizes">
+        <param argument="chrom_sizes" type="data" format="tabular" label="Chromosome sizes" help="First column the chromosome name and second column the size"/>
+    </xml>
+    <xml name="param_genome_fasta">
+        <conditional name="fasta_file_condi">
+            <param name="fastaSource" type="select" label="Select a built-in FASTA or one from your history" help="Choose history if you don't see the correct FASTA.">
+                <option value="indexed" selected="true">Use a built-in FASTA</option>
+                <option value="history">Use a FASTA from history</option>
+            </param>
+            <when value="indexed">
+                <param name="fasta_pre_installed" type="select" label="Select a FASTA file" help="Select the FASTA file from a list of pre-installed genomes">
+                    <options from_data_table="all_fasta">
+                        <filter type="sort_by" column="2" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="fasta_history" type="data" format="fasta,fasta.gz" label="FASTA file" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="param_gene_anno">
+        <conditional name="gff_file_condi">
+            <param name="gffSource" type="select" label="Select a built-in GFF file or one from your history"  help="Choose history if you don't see the correct GFF" >
+                <option value="cached" selected="true">Use a built-in GFF</option>
+                <option value="history">Use a GFF from history</option>
+            </param>
+            <when value="cached">
+                <param name="gff_pre_installed" type="select" label="Select a GFF file" help="Select the GFF from a list of pre-installed files">
+                    <options from_data_table="gene_sets">
+                        <filter type="sort_by" column="1" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="gff_history" type="data" format="gff3.gz" label="Select a GFF file" help="Make sure that the GFF corresponds to the same genome as the FASTA"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="param_n_comps" token_value="30" token_label="Number of dimensions to keep" token_help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30.">
+        <param argument="n_comps" type="integer" value="@VALUE@" label="@LABEL@" help="@HELP@"/>
+    </xml>
+    <xml name="param_meme_table">
+        <param name="motifs" type="select" label="Select list of transcription factor motifs">
+            <options from_data_table="meme">
+                <filter type="sort_by" column="2" />
+            </options>
+        </param>
+    </xml>
+
+
+    <!-- test section -->
+    <xml name="test_param_render_plot">
+        <param name="width" value="650"/>
+        <param name="height" value="450"/>
+    </xml>
+    <xml name="test_render_plot_matching_text">
+        <has_text_matching expression="width = 650"/>
+        <has_text_matching expression="height = 450"/>
+    </xml>
+
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41592-023-02139-9</citation>
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metrics.xml	Tue Nov 25 16:39:28 2025 +0000
@@ -0,0 +1,316 @@
+<tool id="snapatac2_metrics" name="SnapATAC2 Metrics" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>and quality control</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <requirements>
+        <expand macro="requirements"/>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+#if $method.method == 'metrics.tsse':
+@CMD_GET_GFF@
+#end if
+@CMD_PREP_ADATA@
+@CMD@
+    ]]></command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[
+@CONF_IMPORTS@
+@CONF_READ_INPUTS@
+
+#if $method.method == 'metrics.frag_size_distr'
+snap.metrics.frag_size_distr(
+    adata,
+    max_recorded_size = $method.max_recorded_size,
+    add_key = '$method.add_key',
+    inplace = True,
+    n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
+)
+
+#else if $method.method == 'metrics.tsse'
+snap.metrics.tsse(
+    adata,
+    gene_anno = 'gff',
+    #if $method.exclude_chroms != ''
+    #set $excl_chroms = ([x.strip() for x in str($method.exclude_chroms).split(',')])
+    exclude_chroms = $excl_chroms,
+    #end if
+    inplace = True,
+    n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
+)
+
+#else if $method.method == 'metrics.frip'
+snap.metrics.frip(
+    adata,
+    #set $regions_dict = {}
+    #for $i, $region in enumerate($method.regions_repeat)
+        #silent $regions_dict.__setitem__(str($region.region_name), str($region.region_bed))
+    #end for
+    regions = $regions_dict,
+    normalized = $method.normalized,
+    count_as_insertion = $method.count_as_insertion,
+    inplace = True,
+    n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
+)
+
+#else if $method.method == 'metrics.summary_by_chrom'
+import pandas as pd
+
+summary_dict = snap.metrics.summary_by_chrom(
+    adata,
+    mode = '$method.mode',
+    n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
+)
+
+adata.uns['summary_by_chrom'] = summary_dict
+
+summary_df = pd.DataFrame.from_dict(summary_dict, orient='index')
+summary_df.index.name = 'chrom'
+summary_df.to_csv('summary_by_chrom.tabular', sep='\t')
+
+#end if
+@CONF_ANNDATA_WRITE_OUTPUTS@
+    ]]></configfile>
+    </configfiles>
+    <inputs>
+        <conditional name="method">
+            <param name="method" type="select" label="Method used for metrics">
+                <option value="metrics.frag_size_distr">Compute the fragment size distribution of the dataset, using 'metrics.frag_size_distr'</option>
+                <option value="metrics.tsse">Compute the TSS enrichment score (TSSe) for each cell, using 'metrics.tsse'</option>
+                <option value="metrics.frip">Add fraction of reads in peaks (FRiP) to the AnnData object, using 'metrics.frip'</option>
+                <option value="metrics.summary_by_chrom">Compute the cell level summary statistics by chromosome, using 'metrics.summary_by_chrom'</option>
+            </param>
+            <when value="metrics.frag_size_distr">
+                <expand macro="param_inputs_anndata"/>
+                <param argument="max_recorded_size" type="integer" min="1" value="1000" label="The maximum fragment size to record in the result"/>
+                <param argument="add_key" type="text" value="frag_size_distr" label="Key used to store the result in `adata.uns`"/>
+            </when>
+            <when value="metrics.tsse">
+                <expand macro="param_inputs_anndata"/>
+                <expand macro="param_gene_anno"/>
+                <param argument="exclude_chroms" type="text" value="chrM, M" optional="true" label="A list of chromosomes to exclude">
+                    <expand macro="sanitize_query"/>
+                </param>
+            </when>
+            <when value="metrics.frip">
+                <expand macro="param_inputs_anndata"/>
+                <repeat name="regions_repeat" title="Regions" help="Creates a dictionary of regions to compute FRiP for.">
+                    <param argument="region_name" type="text" value="" label="Name of the region" help="A region can be a promoter or enhancer and so on">
+                        <expand macro="sanitize_query">
+                            <remove value="'"/>
+                            <remove value='"'/>
+                            <remove value=" "/>
+                        </expand>
+                    </param>
+                    <param argument="region_bed" type="data" format="bed" label="Regions to compute FRiP for"/>
+                </repeat>
+                <param argument="normalized" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether to normalize the FRiP value by the total number of fragments"/>
+                <param argument="count_as_insertion" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Whether to count transposition events instead of fragments"/>
+            </when>
+            <when value="metrics.summary_by_chrom">
+                <expand macro="param_inputs_anndata"/>
+                <param argument="mode" type="select" label="The summary statistics to compute">
+                    <option value="sum">Sum</option>
+                    <option value="mean">Mean</option>
+                    <option value="count">Count</option>
+                </param>
+            </when>
+        </conditional>
+        <expand macro="param_common_advanced"/>
+    </inputs>
+    <outputs>
+        <data name="anndata_out" format="h5ad" from_work_dir="anndata.h5ad.gz" label="${tool.name} (${method.method}) on ${on_string}: Annotated data matrix"/>
+        <data name="summary_by_chrom" format="tabular" from_work_dir="summary_by_chrom.tabular" label="${tool.name} (${method.method}) on ${on_string}: Summary by chromosome">
+            <filter>method['method'] == 'metrics.summary_by_chrom'</filter>
+        </data>
+        <data name="hidden_output" format="txt" label="Log file">
+            <filter>advanced_common['show_log']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- metrics.frag_size_distr -->
+        <test expect_num_outputs="2">
+            <conditional name="method">
+                <param name="method" value="metrics.frag_size_distr"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/pp.import_fragment.h5ad"/>
+                <param name="max_recorded_size" value="500"/>
+                <param name="add_key" value="frag_size_distr"/>
+                </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="snap.metrics.frag_size_distr"/>
+                    <has_text_matching expression="add_key = 'frag_size_distr'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out">
+                <assert_contents>
+                    <has_h5_keys keys="uns/frag_size_distr"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- metrics.tsse -->
+        <test expect_num_outputs="2">
+            <conditional name="method">
+                <param name="method" value="metrics.tsse"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/pp.import_fragment.h5ad"/>
+                <conditional name="gff_file_condi">
+                    <param name="gffSource" value="history"/>
+                    <param name="gff_history" location="https://zenodo.org/records/17512085/files/chr21.gff3.gz"/>
+                </conditional>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="snap.metrics.tsse"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out">
+                <assert_contents>
+                    <has_h5_keys keys="obs/tsse"/>
+                    <has_h5_keys keys="uns/library_tsse,uns/frac_overlap_TSS,uns/TSS_profile"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- metrics.tsse cached -->
+        <test expect_num_outputs="2">
+            <conditional name="method">
+                <param name="method" value="metrics.tsse"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/pp.import_fragment.h5ad"/>
+                <conditional name="gff_file_condi">
+                    <param name="gffSource" value="cached"/>
+                    <param name="gff_pre_installed" value="hg38"/>
+                </conditional>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="snap.metrics.tsse"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out">
+                <assert_contents>
+                    <has_h5_keys keys="obs/tsse"/>
+                    <has_h5_keys keys="uns/library_tsse,uns/frac_overlap_TSS,uns/TSS_profile"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- metrics.frip -->
+       <test expect_num_outputs="2">
+            <conditional name="method">
+                <param name="method" value="metrics.frip"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/pp.import_fragment.h5ad"/>
+                <repeat name="regions_repeat">
+                    <param name="region_bed" location="https://zenodo.org/records/17512085/files/cre_hea.bed"/>
+                    <param name="region_name" value="peaks_frac"/>
+                </repeat>
+                <param name="normalized" value="true"/>
+                <param name="count_as_insertion" value="false"/>
+                </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="snap.metrics.frip"/>
+                    <has_text_matching expression="regions = {'peaks_frac':"/>
+                    <has_text_matching expression="normalized = True"/>
+                    <has_text_matching expression="count_as_insertion = False"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out">
+                <assert_contents>
+                    <has_h5_keys keys="obs/peaks_frac"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- metrics.summary_by_chrom -->
+        <test expect_num_outputs="3">
+            <conditional name="method">
+                <param name="method" value="metrics.summary_by_chrom"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/pp.import_fragment.h5ad"/>
+                <param name="mode" value="sum"/>
+                </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="snap.metrics.summary_by_chrom"/>
+                    <has_text_matching expression="mode = 'sum'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out">
+                <assert_contents>
+                    <has_h5_keys keys="uns/summary_by_chrom"/>
+                </assert_contents>
+            </output>
+            <output name="summary_by_chrom">
+                <assert_contents>
+                    <has_text_matching expression="chrom\t0\t1\t2\t3"/>
+                    <has_text_matching expression="chr5\t72.0\t126.0"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+Compute the fragment size distribution of the dataset, using `metrics.frag_size_distr`
+
+======================================================================================
+
+Compute the fragment size distribution of the dataset.
+
+This function computes the fragment size distribution of the dataset. Note that it does not operate at the single-cell level. The result is stored in a vector where each element represents the number of fragments and the index represents the fragment length. The first position of the vector is reserved for fragments with size larger than the `max_recorded_size` parameter.
+
+`import_fragments` must be ran first in order to use this function.
+
+More details on the `SnapATAC2 documentation
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.metrics.frag_size_distr.html>`__
+
+
+Compute the TSS enrichment score (TSSe) for each cell, using `metrics.tsse`
+
+===========================================================================
+
+Compute the TSS enrichment score (TSSe) for each cell.
+
+`import_fragments` must be ran first in order to use this function.
+
+More details on the `SnapATAC2 documentation
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.metrics.tsse.html>`__
+
+
+Add fraction of reads in peaks (FRiP) to the AnnData object, using `metrics.frip`
+
+==========================================================================
+
+Compute the fraction of reads in peaks (FRiP) for each cell.
+
+`import_fragments` must be ran first in order to use this function.
+
+More details on the `SnapATAC2 documentation
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.metrics.frip.html>`__
+
+
+Compute the cell level summary statistics by chromosome, using `metrics.summary_by_chrom`
+
+========================================================================================
+
+Compute the cell level summary statistics by chromosome.
+This function computes the cell level summary statistics by chromosome. The result is stored in the `adata.uns['summary_by_chrom']` dictionary, where each key is a chromosome and the value is
+a dictionary with the summary statistics for that chromosome. A tabular file with the summary statistics is also generated.
+
+`import_fragments` must be ran first in order to use this function.
+
+More details on the `SnapATAC2 documentation
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.metrics.summary_by_chrom.html>`__
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc	Tue Nov 25 16:39:28 2025 +0000
@@ -0,0 +1,1 @@
+hg38	hg38	Human (hg38)	${__HERE__}/chr21_small.fasta.gz
\ No newline at end of file
Binary file test-data/chr21.gff3.gz has changed
Binary file test-data/chr21_small.fasta.gz has changed
Binary file test-data/cisBP_human.meme.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_sets.loc	Tue Nov 25 16:39:28 2025 +0000
@@ -0,0 +1,1 @@
+hg38	hg38	hg38GFF	${__HERE__}/chr21.gff3.gz
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme.loc	Tue Nov 25 16:39:28 2025 +0000
@@ -0,0 +1,1 @@
+cisbp	snap.datasets.cis_bp(unique=True)	${__HERE__}/cisBP_human.meme.gz
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Tue Nov 25 16:39:28 2025 +0000
@@ -0,0 +1,17 @@
+#This file lists the locations and dbkeys of all the genome and transcriptome fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel4.5	apiMel4.5	Honeybee (Apis mellifera): apiMel4.5	/path/to/genome/apiMel4.5/apiMel4.5.fa
+#hg38canon	hg38	Human (Homo sapiens): hg38 Canonical	/path/to/genome/hg38/hg38canon.fa
+#hg38full	hg38	Human (Homo sapiens): hg38 Full	/path/to/genome/hg38/hg38full.fa
+#hg38full.90	hg38    Human (Homo sapiens): hg38 Full Trans v90	/path/to/genome/hg38/hg38fulltrans.fa
+
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg38 above.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gene_sets.loc.sample	Tue Nov 25 16:39:28 2025 +0000
@@ -0,0 +1,14 @@
+# This is a sample file distributed with featureCounts that enables it and other# tools to use gene/exon annotations in the GFF/GTF format.
+#
+# The gene_sets.loc file syntax is:
+#<unique_build_id>	<dbkey>	<display_name>	<path>
+#
+# Please ensure that the above fields are tab separated.
+#
+# In case you have TWO or MORE providers PER dbkey, the one mentioned
+# first in the file, should have the "default" priority.
+#
+#Example:
+#
+#Homo_sapiens.GRCh38.90	hg38	GRCh38 (hg38) annotation from Ensembl, release 90	/depot/data2/galaxy/hg38/gene_sets/Homo_sapiens.GRCh38.90.gtf
+#Homo_sapiens.GRCh37.87	hg19	GRCh37 (hg19) annotation from Ensembl, release 87	/depot/data2/galaxy/hg19/gene_sets/Homo_sapiens.GRCh37.87.gtf
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/meme.loc.sample	Tue Nov 25 16:39:28 2025 +0000
@@ -0,0 +1,13 @@
+# This is a sample file distributed with snapatac2 which enables the tool to perform motif enrichment analysis
+#
+# The meme.loc file syntax is:
+#<unique_id>	<display_name>	<path>
+#
+# Please ensure that the above fields are tab separated.
+#
+# Currently the files should be downloaded manually
+#
+#Example:
+#
+#cisbp cis_bp(unique=True)	/path/to/cisBP_human.meme.gz
+#meuleman_2020	Meuleman_2020 /path/to/Meuleman_2020.meme.gz
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Nov 25 16:39:28 2025 +0000
@@ -0,0 +1,17 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+    <!-- Locations of all gff files with annotations of genome builds -->
+    <table name="gene_sets" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/gene_sets.loc" />
+    </table>
+    <!-- Locations of all meme files -->
+    <table name="meme" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/meme.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Tue Nov 25 16:39:28 2025 +0000
@@ -0,0 +1,14 @@
+<tables>
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+    <table name="gene_sets" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/gene_sets.loc" />
+    </table>
+    <table name="meme" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/meme.loc" />
+    </table>
+</tables>
\ No newline at end of file