Mercurial > repos > iuc > snapatac2_network
diff network.xml @ 0:2c289e3b566a draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/snapatac2 commit e0f59fae19e57f54ae0c351a16dd1805d12aba1d
| author | iuc |
|---|---|
| date | Tue, 25 Nov 2025 16:39:58 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/network.xml Tue Nov 25 16:39:58 2025 +0000 @@ -0,0 +1,397 @@ +<tool id="snapatac2_network" name="SnapATAC2 network" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>analysis</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="xrefs"/> + <requirements> + <expand macro="requirements"/> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + gunzip -c '$motifs.fields.path' > 'input.meme' && + #if $gff_file_condi.gffSource == 'cached': + ln -s '$gff_file_condi.gff_pre_installed.fields.path' anno.gff && + #else: + ln -s '$gff_file_condi.gff_history' anno.gff && + #end if + #if $fasta_file_condi.fastaSource == 'indexed': + zcat '$fasta_file_condi.fasta_pre_installed.fields.path' > fasta.fa && + #else: + zcat '$fasta_file_condi.fasta_history' > fasta.fa && + #end if + mkdir -p network_dir && + cp '$adata_rna' 'anndata_rna.h5ad' && + cp '$adata_atac' 'anndata_atac.h5ad' && +@CMD@ + ]]></command> + <configfiles> + <configfile name="script_file"><![CDATA[ +@CONF_IMPORTS@ +## import motifs +from snapatac2._snapatac2 import read_motifs, PyDNAMotif +#if $motifs == 'cisbp': +@CONF_IMPORT_MEME@ +#end if +rna = snap.read('anndata_rna.h5ad', backed = None) +atac = snap.read('anndata_atac.h5ad', backed = None) + +marker_peaks = snap.tl.marker_regions( + atac, + groupby = '$groupby', + pvalue = $pvalue_marker +) + + +for cluster_id, peaks_index in marker_peaks.items(): + print(f"Processing cluster {cluster_id} with {len(peaks_index)} peaks...") + + # Convert pandas Index to list of strings + regions_list = peaks_index.tolist() + + # Create network for this cluster + network = snap.tl.init_network_from_annotation( + regions = regions_list, + anno_file = 'anno.gff', + upstream = $upstream, + downstream = $downstream, + id_type = '$id_type', + coding_gene_only = $coding_gene_only + ) + + # add cor scores + snap.tl.add_cor_scores( + network, + gene_mat=rna, + peak_mat=atac, + select=None, # Will get available if requested by users + overwrite=False + ) + # add regr scores + + snap.tl.add_regr_scores( + network, + gene_mat=rna, + peak_mat=atac, + select=None, # Will get available if requested by users + method = '$method', + scale_X = $scale_X, + scale_Y = $scale_Y, + alpha = $alpha, + l1_ratio = $l1_ratio, + overwrite=False + ) + + # add tf binding + snap.tl.add_tf_binding( + network, + motifs = motifs, + genome_fasta = 'fasta.fa', + pvalue = $pvalue_tf + ) + + ## No good documentation, will get updated in next version if a tutorial is available for it + ## # link TF to gene + ## genetic_network = snap.tl.link_tf_to_gene( + ## network + ## ) + + # Create plot for this cluster + snap.pl.network_edge_stat( + network = network, + width = $width, + height = $height, + show = False, + interactive = False, + out_file = f'network_dir/plot_network_cluster_{cluster_id}.png', + ) + + ## No good documentation, will get updated in next version if a tutorial is available for it + ## snap.pl.network_edge_stat( + ## network = genetic_network, + ## width = $width, + ## height = $height, + ## show = False, + ## interactive = False, + ## out_file = f'network_dir/plot_genetic_network_cluster_{cluster_id}.png', + ## ) + print(f"Completed cluster {cluster_id}") + +print("All clusters processed!") + ]]></configfile> + </configfiles> + <inputs> + <param name="adata_rna" type="data" format="h5ad" label="Annotated data matrix containing RNA data"/> + <param name="adata_atac" type="data" format="h5ad" label="Annotated data matrix containing ATAC data"/> + <expand macro="param_groupby"/> + <param argument="pvalue_marker" type="float" value="0.01" label="P-value threshold to detect marker regions"/> + <expand macro="param_gene_anno"/> + <expand macro="param_genome_fasta"/> + <param name="upstream" type="integer" value="250000" min="0" label="Upstream extension to the transcription start site"/> + <param name="downstream" type="integer" value="250000" min="0" label="Downstream extension to the transcription start site"/> + <param name="id_type" type="select" label="ID type of the gene in the annotation file"> + <option value="gene_name">Gene name</option> + <option value="gene_id">Gene ID</option> + <option value="transcript_id">Transcript ID</option> + </param> + <param name="coding_gene_only" type="boolean" checked="true" truevalue="True" falsevalue="False" label="Retain only coding genes in the network"/> + <param name="method" type="select" label="Regresson model"> + <!-- <option value="elastic_net">elastic_net</option> has problem with sparse matrix--> + <option value="gb_tree">gb_tree</option> + </param> + <param name="scale_X" type="boolean" checked="false" truevalue="True" falsevalue="False" label="Whether to scale the features"/> + <param name="scale_Y" type="boolean" checked="false" truevalue="True" falsevalue="False" label="Whether to scale the response variable"/> + <param name="alpha" type="float" value="1.5" label="Constant that multiplies the penalty terms in ‘elastic_net’"/> + <param name="l1_ratio" type="float" min="0" max="1" value="0.5" label="L1 penalty ratio" help="If set to 0, L2 penalty is used, if set to 1, L1 penalty is used, and if set between 0 and 1, a combination of both penalties is used."/> + <expand macro="param_meme_table"/> + <param argument="pvalue_tf" type="float" value="0.01" label="P-value threshold to detect TF binding"/> + <expand macro="param_render_plot"/> + <expand macro="param_common_advanced"/> + </inputs> + <outputs> + <data name="hidden_output" format="txt" label="Log file" > + <filter>advanced_common['show_log']</filter> + </data> + <collection name="out_network" type="list" label="${tool.name} (${method}) on ${on_string} - Network"> + <discover_datasets pattern="__name_and_ext__" directory="network_dir/"/> + <filter>out_file != 'svg' and out_file != 'html'</filter> + </collection> + <collection name="out_network_svg" type="list" label="${tool.name} (${method}) on ${on_string} - Network - svg"> + <discover_datasets pattern="__name_and_ext__" directory="network_dir/"/> + <filter>out_file == 'svg'</filter> + </collection> + <collection name="out_network_html" type="list" label="${tool.name} (${method}) on ${on_string} - Network - html"> + <discover_datasets pattern="__name_and_ext__" directory="network_dir/"/> + <filter>out_file == 'html'</filter> + </collection> + </outputs> + <tests> + <!-- metrics.frag_size_distr cached --> + <test expect_num_outputs="2"> + <param name="adata_rna" location="https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_RNA.h5ad.gz"/> + <param name="adata_atac" location="https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_ATAC.h5ad.gz"/> + <param name="groupby" value="cell_type"/> + <param name="pvalue_marker" value="0.1"/> + <conditional name="gff_file_condi"> + <param name="gffSource" value="cached"/> + <param name="gff_pre_installed" value="hg38"/> + </conditional> + <conditional name="fasta_file_condi"> + <param name="fastaSource" value="indexed"/> + <param name="fasta_pre_installed" value="hg38"/> + </conditional> + <param name="out_file" value="png"/> + <expand macro="test_param_render_plot"/> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="snap.tl.marker_regions"/> + <has_text_matching expression="snap.tl.init_network_from_annotation"/> + <has_text_matching expression="snap.tl.add_cor_scores"/> + <has_text_matching expression="snap.tl.add_regr_scores"/> + <has_text_matching expression="add_tf_binding"/> + <!-- <has_text_matching expression="snap.tl.link_tf_to_gene"/> --> + <has_text_matching expression="snap.pl.network_edge_stat"/> + <has_text_matching expression="groupby = 'cell_type'"/> + <expand macro="test_render_plot_matching_text"/> + </assert_contents> + </output> + <output_collection name="out_network" type="list" count="8"> + <element name="plot_network_cluster_CD14 Mono"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD16 Mono"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD4 Naive"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD4 TCM"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD8 Naive"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_NK"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_Treg"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_cDC"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + </output_collection> + </test> + <!-- metrics.frag_size_distr history --> + <test expect_num_outputs="2"> + <param name="adata_rna" location="https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_RNA.h5ad.gz"/> + <param name="adata_atac" location="https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_ATAC.h5ad.gz"/> + <param name="groupby" value="cell_type"/> + <param name="pvalue_marker" value="0.1"/> + <conditional name="gff_file_condi"> + <param name="gffSource" value="history"/> + <param name="gff_history" location="https://zenodo.org/records/17512085/files/chr21.gff3.gz"/> + </conditional> + <conditional name="fasta_file_condi"> + <param name="fastaSource" value="history"/> + <param name="fasta_history" location="https://zenodo.org/records/17512085/files/chr21.fasta.gz"/> + </conditional> + <param name="out_file" value="png"/> + <expand macro="test_param_render_plot"/> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="snap.tl.marker_regions"/> + <has_text_matching expression="snap.tl.init_network_from_annotation"/> + <has_text_matching expression="snap.tl.add_cor_scores"/> + <has_text_matching expression="snap.tl.add_regr_scores"/> + <has_text_matching expression="add_tf_binding"/> + <!-- <has_text_matching expression="snap.tl.link_tf_to_gene"/> --> + <has_text_matching expression="snap.pl.network_edge_stat"/> + <has_text_matching expression="groupby = 'cell_type'"/> + <expand macro="test_render_plot_matching_text"/> + </assert_contents> + </output> + <output_collection name="out_network" type="list" count="8"> + <element name="plot_network_cluster_CD14 Mono"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD16 Mono"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD4 Naive"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD4 TCM"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_CD8 Naive"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_NK"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_Treg"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + <element name="plot_network_cluster_cDC"> + <assert_contents> + <has_image_center_of_mass center_of_mass="325,225" eps="20"/> + <has_image_channels channels="4"/> + <has_image_height height="450" delta="20"/> + <has_image_width width="650" delta="20"/> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + <help><![CDATA[ +Build CRE-gene network from gene annotations, using `SnapATAC2` +=============================================================== + +Link CREs to genes if they are close to genes’ promoter regions. + +More details on the `SnapATAC2 documentation +<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.init_network_from_annotation.html>`__ + + +Compute correlation scores for any two connected nodes in the network. + +More details on the `SnapATAC2 documentation +<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.add_cor_scores.html>`__ + + +Perform regression analysis for nodes and their parents in the network. + +More details on the `SnapATAC2 documentation +<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.add_regr_scores.html>`__ + + +Add TF motif binding information. + +More details on the `SnapATAC2 documentation +<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.add_tf_binding.html>`__ + + ]]></help> + <expand macro="citations"/> +</tool> +
