Mercurial > repos > iuc > snapatac2_network
changeset 0:2c289e3b566a draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/snapatac2 commit e0f59fae19e57f54ae0c351a16dd1805d12aba1d
| author | iuc |
|---|---|
| date | Tue, 25 Nov 2025 16:39:58 +0000 |
| parents | |
| children | |
| files | macros.xml network.xml test-data/all_fasta.loc test-data/chr21.gff3.gz test-data/chr21_small.fasta.gz test-data/cisBP_human.meme.gz test-data/gene_sets.loc test-data/meme.loc tool-data/all_fasta.loc.sample tool-data/gene_sets.loc.sample tool-data/meme.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
| diffstat | 13 files changed, 731 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Nov 25 16:39:58 2025 +0000 @@ -0,0 +1,256 @@ +<macros> + <token name="@TOOL_VERSION@">2.8.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">24.0</token> + <xml name="xrefs"> + <xrefs> + <xref type="bio.tools">snapatac</xref> + </xrefs> + </xml> + <xml name="requirements"> + <requirement type="package" version="@TOOL_VERSION@">snapatac2</requirement> + <requirement type="package" version="0.8.37">hdbscan</requirement> + <requirement type="package" version="0.10.2">leidenalg</requirement> + <requirement type="package" version="0.5.7">umap-learn</requirement> + <requirement type="package" version="3.0.4">xgboost</requirement> + <requirement type="package" version="0.2.1">python-kaleido</requirement> + <requirement type="package" version="1.31.0">polars</requirement> + <requirement type="package" version="5.24.1">plotly</requirement> + <requirement type="package" version="0.2.1">python-kaleido</requirement> + <requirement type="package" version="0.0.10">harmonypy</requirement> + <requirement type="package" version="1.7.4">scanorama</requirement> + <yield /> + </xml> + + <!-- command section --> + <token name="@CMD_PREP_ADATA@"><![CDATA[ + ## ln -s does not work here + cp '$method.adata' 'anndata.h5ad' && + ]]></token> + <token name="@CMD@"><![CDATA[ + cat '$script_file' > '$hidden_output' && + python '$script_file' >> '$hidden_output' && + touch 'anndata_info.txt' && + cat 'anndata_info.txt' @CMD_PRETTIFY_STDOUT@ + ]]></token> + <token name="@CMD_PRETTIFY_STDOUT@"><![CDATA[ + | sed -r '1 s|AnnData object with (.+) = (.*)\s*|\1: \2|g' | sed "s|'||g" | sed -r 's|^\s*(.*):\s(.*)|[\1]\n- \2|g' | sed 's|, |\n- |g' + ]]></token> + <token name="@CMD_GET_GFF@"><![CDATA[ + #if $method.gff_file_condi.gffSource == 'cached': + ln -s '$method.gff_file_condi.gff_pre_installed.fields.path' gff && + #else: + ln -s '$method.gff_file_condi.gff_history' gff && + #end if + ]]></token> + <token name="@CMD_GET_FASTA@"><![CDATA[ + #if $method.fasta_file_condi.fastaSource == 'indexed': + zcat '$method.fasta_file_condi.fasta_pre_installed.fields.path' > fasta.fa && + echo "Using built-in FASTA: '$method.fasta_file_condi.fasta_pre_installed.fields.name'" >&2 && + #else: + #if $method.fasta_file_condi.fasta_history.ext.endswith('.gz') + zcat '$method.fasta_file_condi.fasta_history' > fasta.fa && + #else: + ln -s '$method.fasta_file_condi.fasta_history' fasta.fa && + #end if + #end if + ]]></token> + + <!-- Config section --> + <token name="@CONF_IMPORTS@"><![CDATA[ +import snapatac2 as snap +import os + ]]></token> + <token name="@CONF_READ_INPUTS@"><![CDATA[ +adata = snap.read('anndata.h5ad', backed = None) + ]]></token> + <token name="@CONF_ANNDATA_WRITE_OUTPUTS@"><![CDATA[ +adata.write_h5ad('anndata.h5ad.gz', compression='gzip') +with open('anndata_info.txt','w', encoding='utf-8') as ainfo: + print(adata, file=ainfo) + ]]></token> + <token name="@CONF_PARAMS_RENDER_PLOT@"><![CDATA[ + width = $method.width, + height = $method.height, + show = False, + interactive = False, + out_file = 'plot.$method.out_file', + ]]></token> + <token name="@CONF_PARAMS_DATA_INTEGRATION@"><![CDATA[ +use_rep = '$method.use_rep', +#if $method.use_dims != '' +#set $dims = ([x.strip() for x in str($method.use_dims).split(',')]) +use_dims=$dims, +#end if +#if $method.groupby != '' +#set $groupby = ([x.strip() for x in str($method.groupby).split(',')]) +groupby=$groupby, +#end if +#if $method.key_added != '' +key_added = '$method.key_added', +#end if + ]]></token> + <token name="@CONF_IMPORT_MEME@"><![CDATA[ +motifs = read_motifs("input.meme") +for motif in motifs: + motif.name = motif.id.split('+')[0] + +unique_motifs = {} +for motif in motifs: + name = motif.name + if ( + name not in unique_motifs or + unique_motifs[name].info_content() < motif.info_content() + ): + unique_motifs[name] = motif +motifs = list(unique_motifs.values()) + + +#else: +motifs = read_motifs("input.meme") +for motif in motifs: + motif.name = motif.id.split('_')[0] + motif.family = motif.id.split('+')[-1] + ]]></token> + + <!-- input section --> + <xml name="sanitize_query" token_validinitial="string.printable"> + <sanitizer> + <valid initial="@VALIDINITIAL@"> + <remove value="'" /> + <yield/> + </valid> + </sanitizer> + </xml> + + <xml name="param_inputs_anndata" token_multiple="false" token_label="Annotated data matrix"> + <param name="adata" type="data" multiple="@MULTIPLE@" format="h5ad" label="@LABEL@"/> + </xml> + <xml name="param_groupby"> + <param argument="groupby" type="text" label="The key of the observation grouping to consider"> + <expand macro="sanitize_query" /> + </param> + </xml> + <xml name="param_common_advanced"> + <section name="advanced_common" title="Advanced Options" expanded="false"> + <param name="show_log" type="boolean" checked="false" label="Output Log?" /> + </section> + </xml> + <xml name="param_render_plot"> + <param argument="width" type="integer" value="600" label="Width of the plot"/> + <param argument="height" type="integer" value="400" label="Height of the plot"/> + <param name="out_file" type="select" optional="true" label="Type of output plot"> + <option value="png" selected="true">PNG</option> + <option value="svg">SVG</option> + <option value="pdf">PDF</option> + <option value="html">HTML</option> + </param> + </xml> + <xml name="param_shift" tokens="varname" token_value="0" token_label="Insertion site correction for the left end"> + <param argument="@VARNAME@" type="integer" value="@VALUE@" label="@LABEL@" help="Note this has no effect on single-end reads"/> + </xml> + <xml name="param_chunk_size" tokens="size"> + <param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/> + </xml> + <xml name="param_min_max_frag_size"> + <param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/> + <param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/> + </xml> + <xml name="param_data_integration"> + <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/> + <param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation"> + <expand macro="sanitize_query"/> + </param> + <param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider"> + <expand macro="sanitize_query" /> + </param> + <param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/> + </xml> + <xml name="param_random_state" token_label="Seed of the random state generator" token_help=""> + <param argument="random_state" type="integer" value="0" label="@LABEL@" help="@HELP@"/> + </xml> + <xml name="param_key_added" tokens="key_added"> + <param argument="key_added" type="text" value="@KEY_ADDED@" label="`adata.obs` key under which t add cluster labels"/> + </xml> + <xml name="param_use_rep" token_label="Use the indicated representation in `.obsm`"> + <param argument="use_rep" type="text" value="X_spectral" label="@LABEL@"/> + </xml> + <xml name="param_n_iterations"> + <param argument="n_iterations" type="integer" value="-1" label="How many iterations of the Leiden clustering algorithm to perform" + help="Positive values above 2 define the total number of iterations to perform, -1 has the algorithm run until it reaches its optimal clustering."/> + </xml> + <xml name="param_counting_strategy"> + <param argument="counting_strategy" type="select" label="The strategy to compute feature counts"> + <option value="fragment">"fragment": based on the number of fragments that overlap with a region of interest</option> + <option value="insertion">"insertion": based on the number of insertions that overlap with a region of interest</option> + <option value="paired-insertion" selected="true">"paired-insertion": similar to "insertion", but it only counts the insertions once if the pair of insertions of a fragment are both within the same region of interest</option> + </param> + </xml> + <xml name="param_chrom_sizes"> + <param argument="chrom_sizes" type="data" format="tabular" label="Chromosome sizes" help="First column the chromosome name and second column the size"/> + </xml> + <xml name="param_genome_fasta"> + <conditional name="fasta_file_condi"> + <param name="fastaSource" type="select" label="Select a built-in FASTA or one from your history" help="Choose history if you don't see the correct FASTA."> + <option value="indexed" selected="true">Use a built-in FASTA</option> + <option value="history">Use a FASTA from history</option> + </param> + <when value="indexed"> + <param name="fasta_pre_installed" type="select" label="Select a FASTA file" help="Select the FASTA file from a list of pre-installed genomes"> + <options from_data_table="all_fasta"> + <filter type="sort_by" column="2" /> + </options> + </param> + </when> + <when value="history"> + <param name="fasta_history" type="data" format="fasta,fasta.gz" label="FASTA file" /> + </when> + </conditional> + </xml> + <xml name="param_gene_anno"> + <conditional name="gff_file_condi"> + <param name="gffSource" type="select" label="Select a built-in GFF file or one from your history" help="Choose history if you don't see the correct GFF" > + <option value="cached" selected="true">Use a built-in GFF</option> + <option value="history">Use a GFF from history</option> + </param> + <when value="cached"> + <param name="gff_pre_installed" type="select" label="Select a GFF file" help="Select the GFF from a list of pre-installed files"> + <options from_data_table="gene_sets"> + <filter type="sort_by" column="1" /> + </options> + </param> + </when> + <when value="history"> + <param name="gff_history" type="data" format="gff3.gz" label="Select a GFF file" help="Make sure that the GFF corresponds to the same genome as the FASTA"/> + </when> + </conditional> + </xml> + <xml name="param_n_comps" token_value="30" token_label="Number of dimensions to keep" token_help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30."> + <param argument="n_comps" type="integer" value="@VALUE@" label="@LABEL@" help="@HELP@"/> + </xml> + <xml name="param_meme_table"> + <param name="motifs" type="select" label="Select list of transcription factor motifs"> + <options from_data_table="meme"> + <filter type="sort_by" column="2" /> + </options> + </param> + </xml> + + + <!-- test section --> + <xml name="test_param_render_plot"> + <param name="width" value="650"/> + <param name="height" value="450"/> + </xml> + <xml name="test_render_plot_matching_text"> + <has_text_matching expression="width = 650"/> + <has_text_matching expression="height = 450"/> + </xml> + + + <xml name="citations"> + <citations> + <citation type="doi">10.1038/s41592-023-02139-9</citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/network.xml Tue Nov 25 16:39:58 2025 +0000 @@ -0,0 +1,397 @@ +<tool id="snapatac2_network" name="SnapATAC2 network" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>analysis</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="xrefs"/> + <requirements> + <expand macro="requirements"/> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + gunzip -c '$motifs.fields.path' > 'input.meme' && + #if $gff_file_condi.gffSource == 'cached': + ln -s '$gff_file_condi.gff_pre_installed.fields.path' anno.gff && + #else: + ln -s '$gff_file_condi.gff_history' anno.gff && + #end if + #if $fasta_file_condi.fastaSource == 'indexed': + zcat '$fasta_file_condi.fasta_pre_installed.fields.path' > fasta.fa && + #else: + zcat '$fasta_file_condi.fasta_history' > fasta.fa && + #end if + mkdir -p network_dir && + cp '$adata_rna' 'anndata_rna.h5ad' && + cp '$adata_atac' 'anndata_atac.h5ad' && +@CMD@ + ]]></command> + <configfiles> + <configfile name="script_file"><![CDATA[ +@CONF_IMPORTS@ +## import motifs +from snapatac2._snapatac2 import read_motifs, PyDNAMotif +#if $motifs == 'cisbp': +@CONF_IMPORT_MEME@ +#end if +rna = snap.read('anndata_rna.h5ad', backed = None) +atac = snap.read('anndata_atac.h5ad', backed = None) + +marker_peaks = snap.tl.marker_regions( + atac, + groupby = '$groupby', + pvalue = $pvalue_marker +) + + +for cluster_id, peaks_index in marker_peaks.items(): + print(f"Processing cluster {cluster_id} with {len(peaks_index)} peaks...") + + # Convert pandas Index to list of strings + regions_list = peaks_index.tolist() + + # Create network for this cluster + network = snap.tl.init_network_from_annotation( + regions = regions_list, + anno_file = 'anno.gff', + upstream = $upstream, + downstream = $downstream, + id_type = '$id_type', + coding_gene_only = $coding_gene_only + ) + + # add cor scores + snap.tl.add_cor_scores( + network, + gene_mat=rna, + peak_mat=atac, + select=None, # Will get available if requested by users + overwrite=False + ) + # add regr scores + + snap.tl.add_regr_scores( + network, + gene_mat=rna, + peak_mat=atac, + select=None, # Will get available if requested by users + method = '$method', + scale_X = $scale_X, + scale_Y = $scale_Y, + alpha = $alpha, + l1_ratio = $l1_ratio, + overwrite=False + ) + + # add tf binding + snap.tl.add_tf_binding( + network, + motifs = motifs, + genome_fasta = 'fasta.fa', + pvalue = $pvalue_tf + ) + + ## No good documentation, will get updated in next version if a tutorial is available for it + ## # link TF to gene + ## genetic_network = snap.tl.link_tf_to_gene( + ## network + ## ) + + # Create plot for this cluster + snap.pl.network_edge_stat( + network = network, + width = $width, + height = $height, + show = False, + interactive = False, + out_file = f'network_dir/plot_network_cluster_{cluster_id}.png', + ) + + ## No good documentation, will get updated in next version if a tutorial is available for it + ## snap.pl.network_edge_stat( + ## network = genetic_network, + ## width = $width, + ## height = $height, + ## show = False, + ## interactive = False, + ## out_file = f'network_dir/plot_genetic_network_cluster_{cluster_id}.png', + ## ) + print(f"Completed cluster {cluster_id}") + +print("All clusters processed!") + ]]></configfile> + </configfiles> + <inputs> + <param name="adata_rna" type="data" format="h5ad" label="Annotated data matrix containing RNA data"/> + <param name="adata_atac" type="data" format="h5ad" label="Annotated data matrix containing ATAC data"/> + <expand macro="param_groupby"/> + <param argument="pvalue_marker" type="float" value="0.01" label="P-value threshold to detect marker regions"/> + <expand macro="param_gene_anno"/> + <expand macro="param_genome_fasta"/> + <param name="upstream" type="integer" value="250000" min="0" label="Upstream extension to the transcription start site"/> + <param name="downstream" type="integer" value="250000" min="0" label="Downstream extension to the transcription start site"/> + <param name="id_type" type="select" label="ID type of the gene in the annotation file"> + <option value="gene_name">Gene name</option> + <option value="gene_id">Gene ID</option> + <option value="transcript_id">Transcript ID</option> + </param> + <param name="coding_gene_only" type="boolean" checked="true" truevalue="True" falsevalue="False" label="Retain only coding genes in the network"/> + <param name="method" type="select" label="Regresson model"> + <!-- <option value="elastic_net">elastic_net</option> has problem with sparse matrix--> + <option value="gb_tree">gb_tree</option> + </param> + <param name="scale_X" type="boolean" checked="false" truevalue="True" falsevalue="False" label="Whether to scale the features"/> + <param name="scale_Y" type="boolean" checked="false" truevalue="True" falsevalue="False" label="Whether to scale the response variable"/> + <param name="alpha" type="float" value="1.5" label="Constant that multiplies the penalty terms in ‘elastic_net’"/> + <param name="l1_ratio" type="float" min="0" max="1" value="0.5" label="L1 penalty ratio" help="If set to 0, L2 penalty is used, if set to 1, L1 penalty is used, and if set between 0 and 1, a combination of both penalties is used."/> + <expand macro="param_meme_table"/> + <param argument="pvalue_tf" type="float" value="0.01" label="P-value threshold to detect TF binding"/> + <expand macro="param_render_plot"/> + <expand macro="param_common_advanced"/> + </inputs> + <outputs> + <data name="hidden_output" format="txt" label="Log file" > + <filter>advanced_common['show_log']</filter> + </data> + <collection name="out_network" type="list" label="${tool.name} (${method}) on ${on_string} - Network"> + <discover_datasets pattern="__name_and_ext__" directory="network_dir/"/> + <filter>out_file != 'svg' and out_file != 'html'</filter> + </collection> + <collection name="out_network_svg" type="list" label="${tool.name} (${method}) on ${on_string} - Network - svg"> + <discover_datasets pattern="__name_and_ext__" directory="network_dir/"/> + <filter>out_file == 'svg'</filter> + </collection> + <collection name="out_network_html" type="list" label="${tool.name} (${method}) on ${on_string} - Network - html"> + <discover_datasets pattern="__name_and_ext__" directory="network_dir/"/> + <filter>out_file == 'html'</filter> + </collection> + </outputs> + <tests> + <!-- metrics.frag_size_distr cached --> + <test expect_num_outputs="2"> + <param name="adata_rna" location="https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_RNA.h5ad.gz"/> + <param name="adata_atac" location="https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_ATAC.h5ad.gz"/> + <param name="groupby" value="cell_type"/> + <param name="pvalue_marker" value="0.1"/> + <conditional name="gff_file_condi"> + <param name="gffSource" value="cached"/> + <param name="gff_pre_installed" value="hg38"/> + </conditional> + <conditional name="fasta_file_condi"> + <param name="fastaSource" value="indexed"/> + <param name="fasta_pre_installed" value="hg38"/> + </conditional> + <param name="out_file" value="png"/> + <expand macro="test_param_render_plot"/> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="snap.tl.marker_regions"/> + <has_text_matching expression="snap.tl.init_network_from_annotation"/> + <has_text_matching expression="snap.tl.add_cor_scores"/> + <has_text_matching expression="snap.tl.add_regr_scores"/> + <has_text_matching expression="add_tf_binding"/> + <!-- <has_text_matching expression="snap.tl.link_tf_to_gene"/> --> + <has_text_matching expression="snap.pl.network_edge_stat"/> + <has_text_matching expression="groupby = 'cell_type'"/> + <expand macro="test_render_plot_matching_text"/> + </assert_contents> + </output> + <output_collection name="out_network" type="list" count="8"> + <element name="plot_network_cluster_CD14 Mono"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD16 Mono"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD4 Naive"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD4 TCM"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD8 Naive"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_NK"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_Treg"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_cDC"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + </output_collection> + </test> + <!-- metrics.frag_size_distr history --> + <test expect_num_outputs="2"> + <param name="adata_rna" location="https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_RNA.h5ad.gz"/> + <param name="adata_atac" location="https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_ATAC.h5ad.gz"/> + <param name="groupby" value="cell_type"/> + <param name="pvalue_marker" value="0.1"/> + <conditional name="gff_file_condi"> + <param name="gffSource" value="history"/> + <param name="gff_history" location="https://zenodo.org/records/17512085/files/chr21.gff3.gz"/> + </conditional> + <conditional name="fasta_file_condi"> + <param name="fastaSource" value="history"/> + <param name="fasta_history" location="https://zenodo.org/records/17512085/files/chr21.fasta.gz"/> + </conditional> + <param name="out_file" value="png"/> + <expand macro="test_param_render_plot"/> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="snap.tl.marker_regions"/> + <has_text_matching expression="snap.tl.init_network_from_annotation"/> + <has_text_matching expression="snap.tl.add_cor_scores"/> + <has_text_matching expression="snap.tl.add_regr_scores"/> + <has_text_matching expression="add_tf_binding"/> + <!-- <has_text_matching expression="snap.tl.link_tf_to_gene"/> --> + <has_text_matching expression="snap.pl.network_edge_stat"/> + <has_text_matching expression="groupby = 'cell_type'"/> + <expand macro="test_render_plot_matching_text"/> + </assert_contents> + </output> + <output_collection name="out_network" type="list" count="8"> + <element name="plot_network_cluster_CD14 Mono"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD16 Mono"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD4 Naive"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD4 TCM"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD8 Naive"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_NK"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_Treg"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_cDC"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + <help><![CDATA[ +Build CRE-gene network from gene annotations, using `SnapATAC2` +=============================================================== + +Link CREs to genes if they are close to genes’ promoter regions. + +More details on the `SnapATAC2 documentation +<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.init_network_from_annotation.html>`__ + + +Compute correlation scores for any two connected nodes in the network. + +More details on the `SnapATAC2 documentation +<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.add_cor_scores.html>`__ + + +Perform regression analysis for nodes and their parents in the network. + +More details on the `SnapATAC2 documentation +<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.add_regr_scores.html>`__ + + +Add TF motif binding information. + +More details on the `SnapATAC2 documentation +<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.add_tf_binding.html>`__ + + ]]></help> + <expand macro="citations"/> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/all_fasta.loc Tue Nov 25 16:39:58 2025 +0000 @@ -0,0 +1,1 @@ +hg38 hg38 Human (hg38) ${__HERE__}/chr21_small.fasta.gz \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gene_sets.loc Tue Nov 25 16:39:58 2025 +0000 @@ -0,0 +1,1 @@ +hg38 hg38 hg38GFF ${__HERE__}/chr21.gff3.gz \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/meme.loc Tue Nov 25 16:39:58 2025 +0000 @@ -0,0 +1,1 @@ +cisbp snap.datasets.cis_bp(unique=True) ${__HERE__}/cisBP_human.meme.gz \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Tue Nov 25 16:39:58 2025 +0000 @@ -0,0 +1,17 @@ +#This file lists the locations and dbkeys of all the genome and transcriptome fasta files +#under the "genome" directory (a directory that contains a directory +#for each build. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel4.5 apiMel4.5 Honeybee (Apis mellifera): apiMel4.5 /path/to/genome/apiMel4.5/apiMel4.5.fa +#hg38canon hg38 Human (Homo sapiens): hg38 Canonical /path/to/genome/hg38/hg38canon.fa +#hg38full hg38 Human (Homo sapiens): hg38 Full /path/to/genome/hg38/hg38full.fa +#hg38full.90 hg38 Human (Homo sapiens): hg38 Full Trans v90 /path/to/genome/hg38/hg38fulltrans.fa + +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg38 above. \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gene_sets.loc.sample Tue Nov 25 16:39:58 2025 +0000 @@ -0,0 +1,14 @@ +# This is a sample file distributed with featureCounts that enables it and other# tools to use gene/exon annotations in the GFF/GTF format. +# +# The gene_sets.loc file syntax is: +#<unique_build_id> <dbkey> <display_name> <path> +# +# Please ensure that the above fields are tab separated. +# +# In case you have TWO or MORE providers PER dbkey, the one mentioned +# first in the file, should have the "default" priority. +# +#Example: +# +#Homo_sapiens.GRCh38.90 hg38 GRCh38 (hg38) annotation from Ensembl, release 90 /depot/data2/galaxy/hg38/gene_sets/Homo_sapiens.GRCh38.90.gtf +#Homo_sapiens.GRCh37.87 hg19 GRCh37 (hg19) annotation from Ensembl, release 87 /depot/data2/galaxy/hg19/gene_sets/Homo_sapiens.GRCh37.87.gtf \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/meme.loc.sample Tue Nov 25 16:39:58 2025 +0000 @@ -0,0 +1,13 @@ +# This is a sample file distributed with snapatac2 which enables the tool to perform motif enrichment analysis +# +# The meme.loc file syntax is: +#<unique_id> <display_name> <path> +# +# Please ensure that the above fields are tab separated. +# +# Currently the files should be downloaded manually +# +#Example: +# +#cisbp cis_bp(unique=True) /path/to/cisBP_human.meme.gz +#meuleman_2020 Meuleman_2020 /path/to/Meuleman_2020.meme.gz \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Nov 25 16:39:58 2025 +0000 @@ -0,0 +1,17 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> + <!-- Locations of all gff files with annotations of genome builds --> + <table name="gene_sets" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/gene_sets.loc" /> + </table> + <!-- Locations of all meme files --> + <table name="meme" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, path</columns> + <file path="tool-data/meme.loc" /> + </table> +</tables> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Tue Nov 25 16:39:58 2025 +0000 @@ -0,0 +1,14 @@ +<tables> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/all_fasta.loc" /> + </table> + <table name="gene_sets" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/gene_sets.loc" /> + </table> + <table name="meme" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/test-data/meme.loc" /> + </table> +</tables> \ No newline at end of file
