Mercurial > repos > iuc > snapatac2_preprocessing
comparison preprocessing.xml @ 0:b2844c6efb72 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snapatac2 commit df9c285dddde7d901823c608c8d7dab971224b5b
| author | iuc |
|---|---|
| date | Fri, 05 Jul 2024 11:05:36 +0000 |
| parents | |
| children | e3b85e48487b |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b2844c6efb72 |
|---|---|
| 1 <tool id="snapatac2_preprocessing" name="SnapATAC2 Preprocessing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>and integration</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <requirements> | |
| 7 <expand macro="requirements"/> | |
| 8 </requirements> | |
| 9 <command detect_errors="exit_code"><![CDATA[ | |
| 10 #if $method.method != 'pp.make_fragment_file' and $method.method != 'pp.import_data' | |
| 11 @PREP_ADATA@ | |
| 12 #end if | |
| 13 @CMD@ | |
| 14 ]]></command> | |
| 15 <configfiles> | |
| 16 <configfile name="script_file"><![CDATA[ | |
| 17 @CMD_imports@ | |
| 18 | |
| 19 #if $method.method != 'pp.make_fragment_file' and $method.method != 'pp.import_data' | |
| 20 @CMD_read_inputs@ | |
| 21 #end if | |
| 22 | |
| 23 #if $method.method == 'pp.make_fragment_file' | |
| 24 sa.pp.make_fragment_file( | |
| 25 bam_file = '$method.bam_file', | |
| 26 is_paired = $method.is_paired, | |
| 27 #if $method.barcode.extract_type == 'from_tag' | |
| 28 #if $method.barcode.barcode_tag != '' | |
| 29 barcode_tag = '$method.barcode.barcode_tag', | |
| 30 #end if | |
| 31 #elif $method.barcode.extract_type == 'from_read_names' | |
| 32 #if $method.barcode.barcode_regex != '' | |
| 33 barcode_regex = '$method.barcode.barcode_regex', | |
| 34 #end if | |
| 35 #end if | |
| 36 #if $method.umi_tag != '' | |
| 37 umi_tag = '$method.umi_tag', | |
| 38 #end if | |
| 39 #if $method.umi_regex != '' | |
| 40 umi_regex = '$method.umi_regex', | |
| 41 #end if | |
| 42 shift_right = $method.shift_right, | |
| 43 shift_left = $method.shift_left, | |
| 44 min_mapq = $method.min_mapq, | |
| 45 chunk_size = $method.chunk_size, | |
| 46 compression = 'gzip', | |
| 47 output_file = '$fragments_out', | |
| 48 tempdir = "." | |
| 49 ) | |
| 50 | |
| 51 #else if $method.method == 'pp.import_data' | |
| 52 import csv | |
| 53 with open('$method.chrom_sizes') as f: | |
| 54 chr_sizes = {x[0]:int(x[1]) for x in csv.reader(f, delimiter='\t')} | |
| 55 | |
| 56 sa.pp.import_data( | |
| 57 fragment_file = '$method.fragment_file', | |
| 58 chrom_sizes = chr_sizes, | |
| 59 min_num_fragments = $method.min_num_fragments, | |
| 60 sorted_by_barcode = $method.sorted_by_barcode, | |
| 61 #if str($method.whitelist) != 'None' | |
| 62 whitelist = '$method.whitelist', | |
| 63 #end if | |
| 64 shift_left = $method.shift_left, | |
| 65 shift_right = $method.shift_right, | |
| 66 #set $chr_mt = ([x.strip() for x in str($method.chrM).split(',')]) | |
| 67 chrM = $chr_mt, | |
| 68 chunk_size = $method.chunk_size, | |
| 69 file = 'anndata.h5ad', | |
| 70 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) | |
| 71 ) | |
| 72 | |
| 73 #else if $method.method == 'pp.add_tile_matrix' | |
| 74 sa.pp.add_tile_matrix( | |
| 75 adata, | |
| 76 bin_size = $method.bin_size, | |
| 77 chunk_size = $method.chunk_size, | |
| 78 #if $method.exclude_chroms != '' | |
| 79 #set $excl_chroms = ([x.strip() for x in str($method.exclude_chroms).split(',')]) | |
| 80 exclude_chroms = $excl_chroms, | |
| 81 #end if | |
| 82 #if $method.min_frag_size | |
| 83 min_frag_size = $method.min_frag_size, | |
| 84 #end if | |
| 85 #if $method.max_frag_size | |
| 86 max_frag_size = $method.max_frag_size, | |
| 87 #end if | |
| 88 ##counting_strategy = '$method.counting_strategy', | |
| 89 count_frag_as_reads = $method.count_frag_as_reads, | |
| 90 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) | |
| 91 ) | |
| 92 | |
| 93 #else if $method.method == 'pp.make_gene_matrix' | |
| 94 adata = sa.pp.make_gene_matrix( | |
| 95 adata, | |
| 96 gene_anno = '$method.gene_anno', | |
| 97 chunk_size = $method.chunk_size, | |
| 98 use_x = $method.use_x, | |
| 99 id_type = '$method.id_type', | |
| 100 transcript_name_key = '$method.transcript_name_key', | |
| 101 transcript_id_key = '$method.transcript_id_key', | |
| 102 gene_name_key = '$method.gene_name_key', | |
| 103 gene_id_key = '$method.gene_id_key', | |
| 104 #if $method.min_frag_size | |
| 105 min_frag_size = $method.min_frag_size, | |
| 106 #end if | |
| 107 #if $method.max_frag_size | |
| 108 max_frag_size = $method.max_frag_size, | |
| 109 #end if | |
| 110 ##counting_strategy = '$method.counting_strategy' | |
| 111 count_frag_as_reads = $method.count_frag_as_reads | |
| 112 ) | |
| 113 | |
| 114 #else if $method.method == 'pp.filter_cells' | |
| 115 sa.pp.filter_cells( | |
| 116 adata, | |
| 117 min_counts = $method.min_counts, | |
| 118 min_tsse = $method.min_tsse, | |
| 119 #if $method.max_counts | |
| 120 max_counts = $method.max_counts, | |
| 121 #end if | |
| 122 #if $method.max_tsse | |
| 123 max_tsse = $method.max_tsse, | |
| 124 #end if | |
| 125 inplace = True, | |
| 126 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) | |
| 127 ) | |
| 128 | |
| 129 #else if $method.method == 'pp.select_features' | |
| 130 sa.pp.select_features( | |
| 131 adata, | |
| 132 n_features = $method.n_features, | |
| 133 filter_lower_quantile = $method.filter_lower_quantile, | |
| 134 filter_upper_quantile = $method.filter_upper_quantile, | |
| 135 #if str($method.whitelist) != 'None' | |
| 136 whitelist = '$method.whitelist', | |
| 137 #end if | |
| 138 #if str($method.blacklist) != 'None' | |
| 139 blacklist = '$method.blacklist', | |
| 140 #end if | |
| 141 max_iter = $method.max_iter, | |
| 142 inplace = True, | |
| 143 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) | |
| 144 ) | |
| 145 | |
| 146 #else if $method.method == 'pp.scrublet' | |
| 147 sa.pp.scrublet( | |
| 148 adata, | |
| 149 #if $method.features | |
| 150 features = '$method.features', | |
| 151 #end if | |
| 152 n_comps = $method.n_comps, | |
| 153 sim_doublet_ratio = $method.sim_doublet_ratio, | |
| 154 expected_doublet_rate = $method.expected_doublet_rate, | |
| 155 #if $method.n_neighbors | |
| 156 n_neighbors = $method.n_neighbors, | |
| 157 #end if | |
| 158 use_approx_neighbors = $method.use_approx_neighbors, | |
| 159 random_state = $method.random_state, | |
| 160 inplace = True, | |
| 161 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) | |
| 162 ) | |
| 163 | |
| 164 #else if $method.method == 'pp.filter_doublets' | |
| 165 sa.pp.filter_doublets( | |
| 166 adata, | |
| 167 #if $method.probability_threshold | |
| 168 probability_threshold = $method.probability_threshold, | |
| 169 #end if | |
| 170 #if $method.score_threshold | |
| 171 score_threshold = $method.score_threshold, | |
| 172 #end if | |
| 173 inplace = True, | |
| 174 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) | |
| 175 ) | |
| 176 | |
| 177 #else if $method.method == 'pp.mnc_correct' | |
| 178 sa.pp.mnc_correct( | |
| 179 adata, | |
| 180 batch = '$method.batch', | |
| 181 n_neighbors = $method.n_neighbors, | |
| 182 n_clusters = $method.n_clusters, | |
| 183 n_iter = $method.n_iter, | |
| 184 @CMD_params_data_integration@ | |
| 185 inplace = True, | |
| 186 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) | |
| 187 ) | |
| 188 | |
| 189 #else if $method.method == 'pp.harmony' | |
| 190 sa.pp.harmony( | |
| 191 adata, | |
| 192 batch = '$method.batch', | |
| 193 @CMD_params_data_integration@ | |
| 194 inplace = True | |
| 195 ) | |
| 196 | |
| 197 #else if $method.method == 'pp.scanorama_integrate' | |
| 198 sa.pp.scanorama_integrate( | |
| 199 adata, | |
| 200 batch = '$method.batch', | |
| 201 n_neighbors = $method.n_neighbors, | |
| 202 @CMD_params_data_integration@ | |
| 203 inplace = True | |
| 204 ) | |
| 205 | |
| 206 #else if $method.method == 'metrics.frag_size_distr' | |
| 207 sa.metrics.frag_size_distr( | |
| 208 adata, | |
| 209 max_recorded_size = $method.max_recorded_size, | |
| 210 add_key = '$method.add_key', | |
| 211 inplace = True, | |
| 212 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) | |
| 213 ) | |
| 214 | |
| 215 #else if $method.method == 'metrics.tsse' | |
| 216 sa.metrics.tsse( | |
| 217 adata, | |
| 218 gene_anno = '$method.gene_anno', | |
| 219 inplace = True, | |
| 220 n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) | |
| 221 ) | |
| 222 #end if | |
| 223 | |
| 224 #if $method.method != 'pp.make_fragment_file' and $method.method != 'pp.import_data' | |
| 225 @CMD_anndata_write_outputs@ | |
| 226 #end if | |
| 227 ]]></configfile> | |
| 228 </configfiles> | |
| 229 <inputs> | |
| 230 <conditional name="method"> | |
| 231 <param name="method" type="select" label="Method used for preprocessing"> | |
| 232 <option value="pp.make_fragment_file">Convert a BAM file to a fragment file, using 'pp.make_fragment_file'</option> | |
| 233 <option value="pp.import_data">Import data fragment files and compute basic QC metrics, using 'pp.import_data'</option> | |
| 234 <option value="pp.add_tile_matrix">Generate cell by bin count matrix, using 'pp.add_tile_matrix'</option> | |
| 235 <option value="pp.make_gene_matrix">Generate cell by gene activity matrix, using 'pp.make_gene_matrix'</option> | |
| 236 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using 'pp.filter_cells'</option> | |
| 237 <option value="pp.select_features">Perform feature selection, using 'pp.select_features'</option> | |
| 238 <option value="pp.scrublet">Compute probability of being a doublet using the scrublet algorithm, using 'pp.scrublet'</option> | |
| 239 <option value="pp.filter_doublets">Remove doublets according to the doublet probability or doublet score, using 'pp.filter_doublets'</option> | |
| 240 <option value="pp.mnc_correct">A modified MNN-Correct algorithm based on cluster centroid, using 'pp.mnc_correct'</option> | |
| 241 <option value="pp.harmony">Use harmonypy to integrate different experiments,using 'pp.harmony'</option> | |
| 242 <option value="pp.scanorama_integrate">Use Scanorama [Hie19] to integrate different experiments, using 'pp.scanorama_integrate'</option> | |
| 243 <option value="metrics.frag_size_distr">Compute the fragment size distribution of the dataset, using 'metrics.frag_size_distr'</option> | |
| 244 <option value="metrics.tsse">Compute the TSS enrichment score (TSSe) for each cell, using 'metrics.tsse'</option> | |
| 245 </param> | |
| 246 <when value="pp.make_fragment_file"> | |
| 247 <param argument="bam_file" type="data" format="bam" label="File name of the BAM file"/> | |
| 248 <param argument="is_paired" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Indicate whether the BAM file contain paired-end reads"/> | |
| 249 <conditional name="barcode"> | |
| 250 <param name="extract_type" type="select" label="How to extract barcodes from BAM records?"> | |
| 251 <option value="from_tag">From TAG fileds</option> | |
| 252 <option value="from_read_names">From read names using regular expressions</option> | |
| 253 </param> | |
| 254 <when value="from_tag"> | |
| 255 <param argument="barcode_tag" type="text" value="CB" optional="true" label="Extract barcodes from TAG fields of BAM records"/> | |
| 256 </when> | |
| 257 <when value="from_read_names"> | |
| 258 <param argument="barcode_regex" type="text" value="" optional="true" label="Extract barcodes from read names of BAM records using regular expressions" help="`(..:..:..:..):\w+$` extracts `bd:69:Y6:10` from `A01535:24:HW2MMDSX2:2:1359:8513:3458:bd:69:Y6:10:TGATAGGTT``"/> | |
| 259 </when> | |
| 260 </conditional> | |
| 261 <param argument="umi_tag" type="text" value="" optional="true" label="Extract UMI from TAG fields of BAM records"/> | |
| 262 <param argument="umi_regex" type="text" value="" optional="true" label="Extract UMI from read names of BAM records using regular expressions"/> | |
| 263 <expand macro="param_shift"/> | |
| 264 <param argument="min_mapq" type="integer" min="0" value="30" label="Filter the reads based on MAPQ"/> | |
| 265 <expand macro="param_chunk_size" size="50000000"/> | |
| 266 </when> | |
| 267 <when value="pp.import_data"> | |
| 268 <param argument="fragment_file" type="data" format="interval" label="Fragment file, optionally compressed with gzip or zstd"/> | |
| 269 <param argument="chrom_sizes" type="data" format="tabular" label="A tabular file containing chromosome names and sizes"/> | |
| 270 <param argument="min_num_fragments" type="integer" value="200" label="Number of unique fragments threshold used to filter cells"/> | |
| 271 <param argument="sorted_by_barcode" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether the fragment file has been sorted by cell barcodes"/> | |
| 272 <param argument="whitelist" type="data" format="txt" optional="True" label="Whitelist file with a list of barcodes" help="Each line must contain a valid barcode. When provided, only barcodes in the whitelist will be retained."/> | |
| 273 <param argument="chrM" type="text" value="chrM, M" label="A list of chromosome names that are considered mitochondrial DNA"> | |
| 274 <expand macro="sanitize_query"/> | |
| 275 </param> | |
| 276 <param argument="shift_left" type="integer" value="0" label="Insertion site correction for the left end" help="Note this has no effect on single-end reads"/> | |
| 277 <param argument="shift_right" type="integer" value="0" label="Insertion site correction for the right end" help="Note this has no effect on single-end reads"/> | |
| 278 <expand macro="param_chunk_size" size="2000"/> | |
| 279 </when> | |
| 280 <when value="pp.add_tile_matrix"> | |
| 281 <expand macro="inputs_anndata"/> | |
| 282 <param argument="bin_size" type="integer" value="500" label="The size of consecutive genomic regions used to record the counts"/> | |
| 283 <expand macro="param_chunk_size" size="500"/> | |
| 284 <param argument="exclude_chroms" type="text" value="chrM, chrY, M, Y" optional="true" label="A list of chromosomes to exclude"> | |
| 285 <expand macro="sanitize_query"/> | |
| 286 </param> | |
| 287 <expand macro="min_max_frag_size"/> | |
| 288 <!--expand macro="param_counting_strategy"/--> | |
| 289 <param argument="count_frag_as_reads" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether to count fragments as reads"/> | |
| 290 </when> | |
| 291 <when value="pp.make_gene_matrix"> | |
| 292 <expand macro="inputs_anndata"/> | |
| 293 <param argument="gene_anno" type="data" format="gtf,gff3" label="GTF/GFF file containing the gene annotation"/> | |
| 294 <expand macro="param_chunk_size" size="500"/> | |
| 295 <param argument="use_x" type="boolean" truevalue="True" falsevalue="False" checked="false" label="If True, use the matrix stored in .X as raw counts"/> | |
| 296 <param argument="id_type" type="select" label="Id type, 'gene' or 'transcript'"> | |
| 297 <option value="gene" selected="true">gene</option> | |
| 298 <option value="transcript">transcript</option> | |
| 299 </param> | |
| 300 <param argument="transcript_name_key" type="text" value="transcript_name" label="The key of the transcript name in the gene annotation file"/> | |
| 301 <param argument="transcript_id_key" type="text" value="transcript_id" label="The key of the transcript id in the gene annotation file"/> | |
| 302 <param argument="gene_name_key" type="text" value="gene_name" label="The key of the gene name in the gene annotation file"/> | |
| 303 <param argument="gene_id_key" type="text" value="gene_id" label="The key of the gene id in the gene annotation file"/> | |
| 304 <expand macro="min_max_frag_size"/> | |
| 305 <!--expand macro="param_counting_strategy"/--> | |
| 306 <param argument="count_frag_as_reads" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether to count fragments as reads"/> | |
| 307 </when> | |
| 308 <when value="pp.filter_cells"> | |
| 309 <expand macro="inputs_anndata"/> | |
| 310 <param argument="min_counts" type="integer" value="1000" label="Minimum number of counts required for a cell to pass filtering"/> | |
| 311 <param argument="min_tsse" type="float" value="5.0" label="Minimum TSS enrichemnt score required for a cell to pass filtering"/> | |
| 312 <param argument="max_counts" type="integer" value="" optional="true" label="Maximum number of counts required for a cell to pass filtering"/> | |
| 313 <param argument="max_tsse" type="float" value="" optional="true" label="Maximum TSS enrichment score expressed required for a cell to pass filtering"/> | |
| 314 </when> | |
| 315 <when value="pp.select_features"> | |
| 316 <expand macro="inputs_anndata"/> | |
| 317 <param argument="n_features" type="integer" min="1" value="500000" label="Number of features to keep"/> | |
| 318 <param argument="filter_lower_quantile" type="float" min="0" value="0.005" label="Lower quantile of the feature count distribution to filter out"/> | |
| 319 <param argument="filter_upper_quantile" type="float" min="0" value="0.005" label="Upper quantile of the feature count distribution to filter out"/> | |
| 320 <param argument="whitelist" type="data" format="bed" optional="true" label="A user provided bed file containing genome-wide whitelist regions"/> | |
| 321 <param argument="blacklist" type="data" format="bed" optional="true" label="A user provided bed file containing genome-wide blacklist regions"/> | |
| 322 <param argument="max_iter" type="integer" value="1" label="If greater than 1, this function will perform iterative clustering and feature selection"/> | |
| 323 </when> | |
| 324 <when value="pp.scrublet"> | |
| 325 <expand macro="inputs_anndata"/> | |
| 326 <param argument="features" type="text" value="" optional="true" label=" Boolean index mask, where True means that the feature is kept, and False means the feature is removed."/> | |
| 327 <param argument="n_comps" type="integer" value="15" label="Number of components" help="15 is usually sufficient. The algorithm is not sensitive to this parameter"/> | |
| 328 <param argument="sim_doublet_ratio" type="float" value="2.0" label="Number of doublets to simulate relative to the number of observed cells"/> | |
| 329 <param argument="expected_doublet_rate" type="float" value="0.1" label="Expected doublet rate"/> | |
| 330 <param argument="n_neighbors" type="integer" value="" optional="true" label="Number of neighbors used to construct the KNN graph of observed cells and simulated doublets"/> | |
| 331 <param argument="use_approx_neighbors" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Whether to use approximate search"/> | |
| 332 <param argument="random_state" type="integer" value="0" label="Random state"/> | |
| 333 </when> | |
| 334 <when value="pp.filter_doublets"> | |
| 335 <expand macro="inputs_anndata"/> | |
| 336 <param argument="probability_threshold" type="float" value="0.5" label="Threshold for doublet probability"/> | |
| 337 <param argument="score_threshold" type="float" value="" optional="true" label="Threshold for doublet score"/> | |
| 338 </when> | |
| 339 <when value="pp.mnc_correct"> | |
| 340 <expand macro="inputs_anndata"/> | |
| 341 <param argument="batch" type="text" value="batch" label="Batch labels for cells"> | |
| 342 <expand macro="sanitize_query"/> | |
| 343 </param> | |
| 344 <param argument="n_neighbors" type="integer" value="5" label="Number of mutual nearest neighbors"/> | |
| 345 <param argument="n_clusters" type="integer" value="40" label="Number of clusters"/> | |
| 346 <param argument="n_iter" type="integer" value="1" label="Number of iterations"/> | |
| 347 <expand macro="params_data_integration"/> | |
| 348 </when> | |
| 349 <when value="pp.harmony"> | |
| 350 <expand macro="inputs_anndata"/> | |
| 351 <param argument="batch" type="text" value="batch" label="Batch labels for cells"> | |
| 352 <expand macro="sanitize_query"/> | |
| 353 </param> | |
| 354 <expand macro="params_data_integration"/> | |
| 355 </when> | |
| 356 <when value="pp.scanorama_integrate"> | |
| 357 <expand macro="inputs_anndata"/> | |
| 358 <param argument="batch" type="text" value="batch" label="Batch labels for cells"> | |
| 359 <expand macro="sanitize_query"/> | |
| 360 </param> | |
| 361 <param argument="n_neighbors" type="integer" value="20" label="Number of mutual nearest neighbors"/> | |
| 362 <expand macro="params_data_integration"/> | |
| 363 </when> | |
| 364 <when value="metrics.frag_size_distr"> | |
| 365 <!-- TODO move this to plotting --> | |
| 366 <expand macro="inputs_anndata"/> | |
| 367 <param argument="max_recorded_size" type="integer" min="1" value="1000" label="The maximum fragment size to record in the result"/> | |
| 368 <param argument="add_key" type="text" value="frag_size_distr" label="Key used to store the result in `adata.uns`"/> | |
| 369 </when> | |
| 370 <when value="metrics.tsse"> | |
| 371 <!-- TODO move this to plotting --> | |
| 372 <expand macro="inputs_anndata"/> | |
| 373 <param argument="gene_anno" type="data" format="gtf,gff3" label="GTF/GFF file containing the gene annotation"/> | |
| 374 </when> | |
| 375 </conditional> | |
| 376 <expand macro="inputs_common_advanced"/> | |
| 377 </inputs> | |
| 378 <outputs> | |
| 379 <data name="fragments_out" format="interval" label="${tool.name} (${method.method}) on ${on_string}: Fragment file"> | |
| 380 <filter>method['method'] == 'pp.make_fragment_file'</filter> | |
| 381 </data> | |
| 382 <data name="anndata_out" format="h5ad" from_work_dir="anndata.h5ad" label="${tool.name} (${method.method}) on ${on_string}: Annotated data matrix"> | |
| 383 <filter>method['method'] != 'pp.make_fragment_file'</filter> | |
| 384 </data> | |
| 385 <data name="hidden_output" format="txt" label="Log file"> | |
| 386 <filter>advanced_common['show_log']</filter> | |
| 387 </data> | |
| 388 </outputs> | |
| 389 <tests> | |
| 390 <test expect_num_outputs="1"> | |
| 391 <!-- pp.make_fragment_file --> | |
| 392 <conditional name="method"> | |
| 393 <param name="method" value="pp.make_fragment_file"/> | |
| 394 <param name="bam_file" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21_subsample.bam"/> | |
| 395 <param name="is_paired" value="true"/> | |
| 396 <conditional name="barcode"> | |
| 397 <param name="extract_type" value="from_tag"/> | |
| 398 <param name="barcode_tag" value="CB"/> | |
| 399 </conditional> | |
| 400 <param name="shift_left" value="4"/> | |
| 401 <param name="shift_right" value="-5"/> | |
| 402 <param name="min_mapq" value="10"/> | |
| 403 <param name="chunk_size" value="50000000"/> | |
| 404 </conditional> | |
| 405 <output name="fragments_out" location="https://zenodo.org/records/11260316/files/pp.make_fragment_file.pbmc_500_chr21.tsv.gz" ftype="interval" compare="sim_size" delta_frac="0.1"/> | |
| 406 </test> | |
| 407 <test expect_num_outputs="2"> | |
| 408 <!-- pp.pp.import_data --> | |
| 409 <conditional name="method"> | |
| 410 <param name="method" value="pp.import_data"/> | |
| 411 <param name="fragment_file" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.tsv.gz"/> | |
| 412 <param name="chrom_sizes" location="https://zenodo.org/records/11260316/files/chr21_size.tabular"/> | |
| 413 <param name="min_num_fragments" value="1"/> | |
| 414 <param name="sorted_by_barcode" value="False"/> | |
| 415 <param name="shift_left" value="0"/> | |
| 416 <param name="chrM" value="chrM, M"/> | |
| 417 <param name="shift_right" value="0"/> | |
| 418 <param name="chunk_size" value="1000"/> | |
| 419 </conditional> | |
| 420 <section name="advanced_common"> | |
| 421 <param name="show_log" value="true"/> | |
| 422 </section> | |
| 423 <output name="hidden_output"> | |
| 424 <assert_contents> | |
| 425 <has_text_matching expression="sa.pp.import_data"/> | |
| 426 <has_text_matching expression="min_num_fragments = 1"/> | |
| 427 <has_text_matching expression="sorted_by_barcode = False"/> | |
| 428 <has_text_matching expression="shift_left = 0"/> | |
| 429 <has_text_matching expression="chrM = \['chrM', 'M'\]"/> | |
| 430 <has_text_matching expression="shift_right = 0"/> | |
| 431 <has_text_matching expression="chunk_size = 1000"/> | |
| 432 </assert_contents> | |
| 433 </output> | |
| 434 <output name="anndata_out" location="https://zenodo.org/records/11260316/files/pp.import_data.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1"/> | |
| 435 </test> | |
| 436 <test expect_num_outputs="2"> | |
| 437 <!-- pp.make_gene_matrix --> | |
| 438 <conditional name="method"> | |
| 439 <param name="method" value="pp.make_gene_matrix"/> | |
| 440 <param name="adata" location="https://zenodo.org/records/11260316/files/tl.leiden.modularity.pbmc_500_chr21.h5ad"/> | |
| 441 <param name="gene_anno" location="https://zenodo.org/records/11260316/files/chr21.gff3.gz"/> | |
| 442 <param name="chunk_size" value="500"/> | |
| 443 <param name="use_x" value="False"/> | |
| 444 <param name="id_type" value="gene"/> | |
| 445 <param name="transcript_name_key" value="transcript_name"/> | |
| 446 <param name="transcript_id_key" value="transcript_id"/> | |
| 447 <param name="gene_name_key" value="gene_name"/> | |
| 448 <param name="gene_id_key" value="gene_id"/> | |
| 449 <param name="count_frag_as_reads" value="True"/> | |
| 450 </conditional> | |
| 451 <section name="advanced_common"> | |
| 452 <param name="show_log" value="true" /> | |
| 453 </section> | |
| 454 <output name="hidden_output"> | |
| 455 <assert_contents> | |
| 456 <has_text_matching expression="sa.pp.make_gene_matrix"/> | |
| 457 <has_text_matching expression="chunk_size = 500"/> | |
| 458 <has_text_matching expression="use_x = False"/> | |
| 459 <has_text_matching expression="id_type = 'gene'"/> | |
| 460 <has_text_matching expression="transcript_name_key = 'transcript_name'"/> | |
| 461 <has_text_matching expression="transcript_id_key = 'transcript_id'"/> | |
| 462 <has_text_matching expression="gene_name_key = 'gene_name'"/> | |
| 463 <has_text_matching expression="gene_id_key = 'gene_id'"/> | |
| 464 <has_text_matching expression="count_frag_as_reads = True"/> | |
| 465 </assert_contents> | |
| 466 </output> | |
| 467 <output name="anndata_out" location="https://zenodo.org/records/12548681/files/pp.make_gene_matrix.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" /> | |
| 468 </test> | |
| 469 <test expect_num_outputs="2"> | |
| 470 <!-- metrics.tsse --> | |
| 471 <conditional name="method"> | |
| 472 <param name="method" value="metrics.tsse"/> | |
| 473 <param name="adata" location="https://zenodo.org/records/11260316/files/pp.import_data.pbmc_500_chr21.h5ad"/> | |
| 474 <param name="gene_anno" location="https://zenodo.org/records/11260316/files/chr21.gff3.gz"/> | |
| 475 </conditional> | |
| 476 <section name="advanced_common"> | |
| 477 <param name="show_log" value="true" /> | |
| 478 </section> | |
| 479 <output name="hidden_output"> | |
| 480 <assert_contents> | |
| 481 <has_text_matching expression="sa.metrics.tsse"/> | |
| 482 </assert_contents> | |
| 483 </output> | |
| 484 <output name="anndata_out" location="https://zenodo.org/records/11260316/files/metrics.tsse.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" /> | |
| 485 </test> | |
| 486 <test expect_num_outputs="2"> | |
| 487 <!-- pp.filter_cells --> | |
| 488 <conditional name="method"> | |
| 489 <param name="method" value="pp.filter_cells"/> | |
| 490 <param name="adata" location="https://zenodo.org/records/11260316/files/metrics.tsse.pbmc_500_chr21.h5ad"/> | |
| 491 <param name="min_counts" value="200"/> | |
| 492 <param name="min_tsse" value="5"/> | |
| 493 <param name="max_counts" value="10000"/> | |
| 494 </conditional> | |
| 495 <section name="advanced_common"> | |
| 496 <param name="show_log" value="true" /> | |
| 497 </section> | |
| 498 <output name="hidden_output"> | |
| 499 <assert_contents> | |
| 500 <has_text_matching expression="sa.pp.filter_cells"/> | |
| 501 <has_text_matching expression="min_counts = 200"/> | |
| 502 <has_text_matching expression="min_tsse = 5"/> | |
| 503 <has_text_matching expression="max_counts = 10000"/> | |
| 504 </assert_contents> | |
| 505 </output> | |
| 506 <output name="anndata_out" location="https://zenodo.org/records/11260316/files/pp.filter_cells.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" /> | |
| 507 </test> | |
| 508 <test expect_num_outputs="2"> | |
| 509 <!-- pp.add_tile_matrix --> | |
| 510 <conditional name="method"> | |
| 511 <param name="method" value="pp.add_tile_matrix"/> | |
| 512 <param name="adata" location="https://zenodo.org/records/11260316/files/pp.filter_cells.pbmc_500_chr21.h5ad"/> | |
| 513 <param name="bin_size" value="5000"/> | |
| 514 <param name="chunk_size" value="500"/> | |
| 515 <param name="exclude_chroms" value="chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr22, chrX, chrY"/> | |
| 516 <param name="count_frag_as_reads" value="True"/> | |
| 517 </conditional> | |
| 518 <section name="advanced_common"> | |
| 519 <param name="show_log" value="true" /> | |
| 520 </section> | |
| 521 <output name="hidden_output"> | |
| 522 <assert_contents> | |
| 523 <has_text_matching expression="sa.pp.add_tile_matrix"/> | |
| 524 <has_text_matching expression="bin_size = 5000"/> | |
| 525 <has_text_matching expression="chunk_size = 500"/> | |
| 526 <has_text_matching expression="exclude_chroms = \['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr22', 'chrX', 'chrY'\]"/> | |
| 527 <has_text_matching expression="count_frag_as_reads = True"/> | |
| 528 </assert_contents> | |
| 529 </output> | |
| 530 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.add_tile_matrix.pbmc_500_chr21.h5ad"/> | |
| 531 </test> | |
| 532 <test expect_num_outputs="2"> | |
| 533 <!-- pp.select_features --> | |
| 534 <conditional name="method"> | |
| 535 <param name="method" value="pp.select_features"/> | |
| 536 <param name="adata" location="https://zenodo.org/records/11260316/files/pp.add_tile_matrix.pbmc_500_chr21.h5ad"/> | |
| 537 <param name="n_features" value="15000"/> | |
| 538 </conditional> | |
| 539 <section name="advanced_common"> | |
| 540 <param name="show_log" value="true" /> | |
| 541 </section> | |
| 542 <output name="hidden_output"> | |
| 543 <assert_contents> | |
| 544 <has_text_matching expression="sa.pp.select_features"/> | |
| 545 <has_text_matching expression="n_features = 15000"/> | |
| 546 </assert_contents> | |
| 547 </output> | |
| 548 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.select_features.pbmc_500_chr21.h5ad"/> | |
| 549 </test> | |
| 550 <test expect_num_outputs="2"> | |
| 551 <!-- pp.scrublet --> | |
| 552 <conditional name="method"> | |
| 553 <param name="method" value="pp.scrublet"/> | |
| 554 <param name="adata" location="https://zenodo.org/records/11260316/files/pp.select_features.pbmc_500_chr21.h5ad"/> | |
| 555 <param name="n_comps" value="15"/> | |
| 556 <param name="sim_doublet_ratio" value="2.0"/> | |
| 557 <param name="expected_doublet_rate" value="0.1"/> | |
| 558 <param name="random_state" value="0"/> | |
| 559 </conditional> | |
| 560 <section name="advanced_common"> | |
| 561 <param name="show_log" value="true" /> | |
| 562 </section> | |
| 563 <output name="hidden_output"> | |
| 564 <assert_contents> | |
| 565 <has_text_matching expression="sa.pp.scrublet"/> | |
| 566 <has_text_matching expression="n_comps = 15"/> | |
| 567 <has_text_matching expression="sim_doublet_ratio = 2.0"/> | |
| 568 <has_text_matching expression="expected_doublet_rate = 0.1"/> | |
| 569 <has_text_matching expression="random_state = 0"/> | |
| 570 </assert_contents> | |
| 571 </output> | |
| 572 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.scrublet.pbmc_500_chr21.h5ad"/> | |
| 573 </test> | |
| 574 <test expect_num_outputs="2"> | |
| 575 <!-- pp.filter_doublets --> | |
| 576 <conditional name="method"> | |
| 577 <param name="method" value="pp.filter_doublets"/> | |
| 578 <param name="adata" location="https://zenodo.org/records/11260316/files/pp.scrublet.pbmc_500_chr21.h5ad"/> | |
| 579 <param name="probability_threshold" value="0.1"/> | |
| 580 </conditional> | |
| 581 <section name="advanced_common"> | |
| 582 <param name="show_log" value="true" /> | |
| 583 </section> | |
| 584 <output name="hidden_output"> | |
| 585 <assert_contents> | |
| 586 <has_text_matching expression="sa.pp.filter_doublets"/> | |
| 587 </assert_contents> | |
| 588 </output> | |
| 589 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.filter_doublets.pbmc_500_chr21.h5ad"/> | |
| 590 </test> | |
| 591 <test expect_num_outputs="2"> | |
| 592 <!-- pp.mnc_correct --> | |
| 593 <conditional name="method"> | |
| 594 <param name="method" value="pp.mnc_correct"/> | |
| 595 <param name="adata" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.batch.h5ad"/> | |
| 596 <param name="batch" value="batch"/> | |
| 597 <param name="n_neighbors" value="3"/> | |
| 598 <param name="n_clusters" value="10"/> | |
| 599 <param name="use_rep" value="X_spectral"/> | |
| 600 </conditional> | |
| 601 <section name="advanced_common"> | |
| 602 <param name="show_log" value="true" /> | |
| 603 </section> | |
| 604 <output name="hidden_output"> | |
| 605 <assert_contents> | |
| 606 <has_text_matching expression="sa.pp.mnc_correct"/> | |
| 607 <has_text_matching expression="batch = 'batch'"/> | |
| 608 <has_text_matching expression="n_neighbors = 3"/> | |
| 609 <has_text_matching expression="n_clusters = 10"/> | |
| 610 <has_text_matching expression="batch = 'batch'"/> | |
| 611 <has_text_matching expression="use_rep = 'X_spectral'"/> | |
| 612 </assert_contents> | |
| 613 </output> | |
| 614 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.mnc_correct.pbmc_500_chr21.h5ad"/> | |
| 615 </test> | |
| 616 <test expect_num_outputs="2"> | |
| 617 <!-- pp.harmony --> | |
| 618 <conditional name="method"> | |
| 619 <param name="method" value="pp.harmony"/> | |
| 620 <param name="adata" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.batch.h5ad"/> | |
| 621 <param name="batch" value="batch"/> | |
| 622 <param name="use_rep" value="X_spectral"/> | |
| 623 </conditional> | |
| 624 <section name="advanced_common"> | |
| 625 <param name="show_log" value="true" /> | |
| 626 </section> | |
| 627 <output name="hidden_output"> | |
| 628 <assert_contents> | |
| 629 <has_text_matching expression="sa.pp.harmony"/> | |
| 630 <has_text_matching expression="batch = 'batch'"/> | |
| 631 <has_text_matching expression="use_rep = 'X_spectral'"/> | |
| 632 </assert_contents> | |
| 633 </output> | |
| 634 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.harmony.pbmc_500_chr21.h5ad"/> | |
| 635 </test> | |
| 636 <test expect_num_outputs="2"> | |
| 637 <!-- pp.scanorama_integrate --> | |
| 638 <conditional name="method"> | |
| 639 <param name="method" value="pp.scanorama_integrate"/> | |
| 640 <param name="adata" location="https://zenodo.org/records/11260316/files/pbmc_500_chr21.batch.h5ad"/> | |
| 641 <param name="batch" value="batch"/> | |
| 642 <param name="use_rep" value="X_spectral"/> | |
| 643 </conditional> | |
| 644 <section name="advanced_common"> | |
| 645 <param name="show_log" value="true" /> | |
| 646 </section> | |
| 647 <output name="hidden_output"> | |
| 648 <assert_contents> | |
| 649 <has_text_matching expression="sa.pp.scanorama_integrate"/> | |
| 650 <has_text_matching expression="batch = 'batch'"/> | |
| 651 <has_text_matching expression="use_rep = 'X_spectral'"/> | |
| 652 </assert_contents> | |
| 653 </output> | |
| 654 <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.scanorama_integrate.pbmc_500_chr21.h5ad"/> | |
| 655 </test> | |
| 656 <test expect_num_outputs="2"> | |
| 657 <!-- metrics.frag_size_distr --> | |
| 658 <conditional name="method"> | |
| 659 <param name="method" value="metrics.frag_size_distr"/> | |
| 660 <param name="adata" location="https://zenodo.org/records/11260316/files/pp.import_data.pbmc_500_chr21.h5ad"/> | |
| 661 <param name="max_recorded_size" value="500"/> | |
| 662 <param name="add_key" value="frag_size_distr"/> | |
| 663 </conditional> | |
| 664 <section name="advanced_common"> | |
| 665 <param name="show_log" value="true" /> | |
| 666 </section> | |
| 667 <output name="hidden_output"> | |
| 668 <assert_contents> | |
| 669 <has_text_matching expression="sa.metrics.frag_size_distr"/> | |
| 670 <has_text_matching expression="add_key = 'frag_size_distr'"/> | |
| 671 </assert_contents> | |
| 672 </output> | |
| 673 <output name="anndata_out" location="https://zenodo.org/records/11260316/files/metrics.frag_size_distr.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1" /> | |
| 674 </test> | |
| 675 </tests> | |
| 676 <help><![CDATA[ | |
| 677 Convert a BAM file`to a fragment file, using `pp.make_fragment_file` | |
| 678 ==================================================================== | |
| 679 | |
| 680 Convert a BAM file to a fragment file. | |
| 681 | |
| 682 Convert a BAM file to a fragment file by performing the following steps: | |
| 683 | |
| 684 - Filtering: remove reads that are unmapped, not primary alignment, mapq < 30, fails platform/vendor quality checks, or optical duplicate. For paired-end sequencing, it also removes reads that are not properly aligned. | |
| 685 | |
| 686 - Deduplicate: Sort the reads by cell barcodes and remove duplicated reads for each unique cell barcode. | |
| 687 | |
| 688 - Output: Convert BAM records to fragments (if paired-end) or single-end reads. | |
| 689 | |
| 690 The bam file needn’t be sorted or filtered. | |
| 691 | |
| 692 More details on the `SnapATAC2 documentation | |
| 693 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.make_fragment_file.html>`__ | |
| 694 | |
| 695 Import data fragment file` and compute basic QC metrics, using `pp.import_data` | |
| 696 =============================================================================== | |
| 697 | |
| 698 Import data fragment files and compute basic QC metrics. | |
| 699 | |
| 700 A fragment refers to the sequence data originating from a distinct location in the genome. In single-ended sequencing, one read equates to a fragment. However, in paired-ended sequencing, a fragment is defined by a pair of reads. This function is designed to handle, store, and process input files with fragment data, further yielding a range of basic Quality Control (QC) metrics. These metrics include the total number of unique fragments, duplication rates, and the percentage of mitochondrial DNA detected. | |
| 701 | |
| 702 How fragments are stored is dependent on the sequencing approach utilized. For single-ended sequencing, fragments are found in `.obsm['fragment_single']`. In contrast, for paired-ended sequencing, they are located in `.obsm['fragment_paired']`. | |
| 703 | |
| 704 More details on the `SnapATAC2 documentation | |
| 705 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.import_data.html>`__ | |
| 706 | |
| 707 Generate cell by bin count matrix, using `pp.add_tile_matrix` | |
| 708 ============================================================= | |
| 709 | |
| 710 Generate cell by bin count matrix. | |
| 711 | |
| 712 This function is used to generate and add a cell by bin count matrix to the AnnData object. | |
| 713 | |
| 714 `import_data` must be ran first in order to use this function. | |
| 715 | |
| 716 More details on the `SnapATAC2 documentation | |
| 717 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.add_tile_matrix.html>`__ | |
| 718 | |
| 719 Generate cell by gene activity matrix, using `pp.make_gene_matrix` | |
| 720 ================================================================== | |
| 721 | |
| 722 Generate cell by gene activity matrix. | |
| 723 | |
| 724 Generate cell by gene activity matrix by counting the TN5 insertions in gene body regions. The result will be stored in a new file and a new AnnData object will be created. | |
| 725 | |
| 726 `import_data` must be ran first in order to use this function. | |
| 727 | |
| 728 More details on the `SnapATAC2 documentation | |
| 729 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.make_gene_matrix.html>`__ | |
| 730 | |
| 731 Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells` | |
| 732 ============================================================================================ | |
| 733 | |
| 734 Filter cell outliers based on counts and numbers of genes expressed. For instance, only keep cells with at least `min_counts` counts or `min_ts`` TSS enrichment scores. This is to filter measurement outliers, i.e. “unreliable” observations. | |
| 735 | |
| 736 More details on the `SnapATAC2 documentation | |
| 737 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.filter_cells.html>`__ | |
| 738 | |
| 739 Perform feature selection, using `pp.select_features` | |
| 740 ===================================================== | |
| 741 | |
| 742 Perform feature selection by selecting the most accessibile features across all cells unless `max_iter` > 1 | |
| 743 | |
| 744 More details on the `SnapATAC2 documentation | |
| 745 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.select_features.html>`__ | |
| 746 | |
| 747 Compute probability of being a doublet using the scrublet algorithm, using `pp.scrublet` | |
| 748 ======================================================================================== | |
| 749 | |
| 750 Compute probability of being a doublet using the scrublet algorithm. | |
| 751 | |
| 752 This function identifies doublets by generating simulated doublets using randomly pairing chromatin accessibility profiles of individual cells. The simulated doublets are then embedded alongside the original cells using the spectral embedding algorithm in this package. A k-nearest-neighbor classifier is trained to distinguish between the simulated doublets and the authentic cells. This trained classifier produces a “doublet score” for each cell. The doublet scores are then converted into probabilities using a Gaussian mixture model. | |
| 753 | |
| 754 More details on the `SnapATAC2 documentation | |
| 755 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.scrublet.html>`__ | |
| 756 | |
| 757 Remove doublets according to the doublet probability or doublet score, using `pp.filter_doublets` | |
| 758 ================================================================================================= | |
| 759 | |
| 760 Remove doublets according to the doublet probability or doublet score. | |
| 761 | |
| 762 The user can choose to remove doublets by either the doublet probability or the doublet score. `scrublet` must be ran first in order to use this function. | |
| 763 | |
| 764 More details on the `SnapATAC2 documentation | |
| 765 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.filter_doublets.html>`__ | |
| 766 | |
| 767 A modified MNN-Correct algorithm based on cluster centroid, using `pp.mnc_correct` | |
| 768 ================================================================================== | |
| 769 | |
| 770 A modified MNN-Correct algorithm based on cluster centroid. | |
| 771 | |
| 772 More details on the `SnapATAC2 documentation | |
| 773 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.mnc_correct.html>`__ | |
| 774 | |
| 775 Use harmonypy to integrate different experiments,using `pp.harmony` | |
| 776 =================================================================== | |
| 777 | |
| 778 Use harmonypy to integrate different experiments. | |
| 779 | |
| 780 Harmony is an algorithm for integrating single-cell data from multiple experiments. This function uses the python port of Harmony, `harmonypy`, to integrate single-cell data stored in an AnnData object. This function should be run after performing dimension reduction. | |
| 781 | |
| 782 More details on the `SnapATAC2 documentation | |
| 783 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.harmony.html>`__ | |
| 784 | |
| 785 Use Scanorama to integrate different experiments, using `pp.scanorama_integrate` | |
| 786 ======================================================================================== | |
| 787 | |
| 788 Use Scanorama to integrate different experiments. | |
| 789 | |
| 790 Scanorama is an algorithm for integrating single-cell data from multiple experiments stored in an AnnData object. This function should be run after performing `tl.spectral` but before computing the neighbor graph. | |
| 791 | |
| 792 More details on the `SnapATAC2 documentation | |
| 793 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.scanorama_integrate.html>`__ | |
| 794 | |
| 795 Compute the fragment size distribution of the dataset, using `metrics.frag_size_distr` | |
| 796 ====================================================================================== | |
| 797 | |
| 798 Compute the fragment size distribution of the dataset. | |
| 799 | |
| 800 This function computes the fragment size distribution of the dataset. Note that it does not operate at the single-cell level. The result is stored in a vector where each element represents the number of fragments and the index represents the fragment length. The first posision of the vector is reserved for fragments with size larger than the `max_recorded_size` parameter. `import_data` must be ran first in order to use this function. | |
| 801 | |
| 802 More details on the `SnapATAC2 documentation | |
| 803 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.metrics.frag_size_distr.html>`__ | |
| 804 | |
| 805 Compute the TSS enrichment score (TSSe) for each cell, using `metrics.tsse` | |
| 806 =========================================================================== | |
| 807 | |
| 808 Compute the TSS enrichment score (TSSe) for each cell. | |
| 809 | |
| 810 `import_data` must be ran first in order to use this function. | |
| 811 | |
| 812 More details on the `SnapATAC2 documentation | |
| 813 <https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.metrics.tsse.html>`__ | |
| 814 | |
| 815 ]]></help> | |
| 816 <expand macro="citations"/> | |
| 817 </tool> |
