Mercurial > repos > recetox > recetox_aplcms_adjust_time
comparison utils.R @ 0:5c7f041a5cf9 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 19de0924a65bc65cbbf7c1fc17e9b5348305f95c
| author | recetox |
|---|---|
| date | Fri, 10 Jun 2022 10:11:55 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:5c7f041a5cf9 |
|---|---|
| 1 library(recetox.aplcms) | |
| 2 | |
| 3 align_features <- function(sample_names, ...) { | |
| 4 aligned <- feature.align(...) | |
| 5 feature_names <- seq_len(nrow(aligned$pk.times)) | |
| 6 | |
| 7 list( | |
| 8 mz_tolerance = as.numeric(aligned$mz.tol), | |
| 9 rt_tolerance = as.numeric(aligned$chr.tol), | |
| 10 rt_crosstab = as_feature_crosstab(feature_names, sample_names, aligned$pk.times), | |
| 11 int_crosstab = as_feature_crosstab(feature_names, sample_names, aligned$aligned.ftrs) | |
| 12 ) | |
| 13 } | |
| 14 | |
| 15 get_sample_name <- function(filename) { | |
| 16 tools::file_path_sans_ext(basename(filename)) | |
| 17 } | |
| 18 | |
| 19 as_feature_crosstab <- function(feature_names, sample_names, data) { | |
| 20 colnames(data) <- c("mz", "rt", "mz_min", "mz_max", sample_names) | |
| 21 rownames(data) <- feature_names | |
| 22 as.data.frame(data) | |
| 23 } | |
| 24 | |
| 25 as_feature_sample_table <- function(rt_crosstab, int_crosstab) { | |
| 26 feature_names <- rownames(rt_crosstab) | |
| 27 sample_names <- colnames(rt_crosstab)[- (1:4)] | |
| 28 | |
| 29 feature_table <- data.frame( | |
| 30 feature = feature_names, | |
| 31 mz = rt_crosstab[, 1], | |
| 32 rt = rt_crosstab[, 2] | |
| 33 ) | |
| 34 | |
| 35 # series of conversions to produce a table type from data.frame | |
| 36 rt_crosstab <- as.table(as.matrix(rt_crosstab[, - (1:4)])) | |
| 37 int_crosstab <- as.table(as.matrix(int_crosstab[, - (1:4)])) | |
| 38 | |
| 39 crosstab_axes <- list(feature = feature_names, sample = sample_names) | |
| 40 dimnames(rt_crosstab) <- dimnames(int_crosstab) <- crosstab_axes | |
| 41 | |
| 42 x <- as.data.frame(rt_crosstab, responseName = "sample_rt") | |
| 43 y <- as.data.frame(int_crosstab, responseName = "sample_intensity") | |
| 44 | |
| 45 data <- merge(x, y, by = c("feature", "sample")) | |
| 46 data <- merge(feature_table, data, by = "feature") | |
| 47 data | |
| 48 } | |
| 49 | |
| 50 load_features <- function(files) { | |
| 51 files_list <- sort_samples_by_acquisition_number(files) | |
| 52 features <- lapply(files_list, arrow::read_parquet) | |
| 53 features <- lapply(features, as.matrix) | |
| 54 return(features) | |
| 55 } | |
| 56 | |
| 57 save_data_as_parquet_files <- function(data, subdir) { | |
| 58 dir.create(subdir) | |
| 59 for (i in 0:(length(data) - 1)) { | |
| 60 filename <- file.path(subdir, paste0(subdir, "_features_", i, ".parquet")) | |
| 61 arrow::write_parquet(as.data.frame(data[i + 1]), filename) | |
| 62 } | |
| 63 } | |
| 64 | |
| 65 save_aligned_features <- function(aligned, rt_file, int_file, tol_file) { | |
| 66 arrow::write_parquet(as.data.frame(aligned$rt_crosstab), rt_file) | |
| 67 arrow::write_parquet(as.data.frame(aligned$int_crosstab), int_file) | |
| 68 | |
| 69 mz_tolerance <- c(aligned$mz_tolerance) | |
| 70 rt_tolerance <- c(aligned$rt_tolerance) | |
| 71 arrow::write_parquet(data.frame(mz_tolerance, rt_tolerance), tol_file) | |
| 72 } | |
| 73 | |
| 74 load_aligned_features <- function(rt_file, int_file, tol_file) { | |
| 75 rt_cross_table <- arrow::read_parquet(rt_file) | |
| 76 int_cross_table <- arrow::read_parquet(int_file) | |
| 77 tolerances_table <- arrow::read_parquet(tol_file) | |
| 78 | |
| 79 result <- list() | |
| 80 result$mz_tolerance <- tolerances_table$mz_tolerance | |
| 81 result$rt_tolerance <- tolerances_table$rt_tolerance | |
| 82 result$rt_crosstab <- rt_cross_table | |
| 83 result$int_crosstab <- int_cross_table | |
| 84 return(result) | |
| 85 } | |
| 86 | |
| 87 recover_signals <- function(cluster, | |
| 88 filenames, | |
| 89 extracted, | |
| 90 corrected, | |
| 91 aligned, | |
| 92 mz_tol = 1e-05, | |
| 93 mz_range = NA, | |
| 94 rt_range = NA, | |
| 95 use_observed_range = TRUE, | |
| 96 min_bandwidth = NA, | |
| 97 max_bandwidth = NA, | |
| 98 recover_min_count = 3) { | |
| 99 if (!is(cluster, "cluster")) { | |
| 100 cluster <- parallel::makeCluster(cluster) | |
| 101 on.exit(parallel::stopCluster(cluster)) | |
| 102 } | |
| 103 | |
| 104 clusterExport(cluster, c("extracted", "corrected", "aligned", "recover.weaker")) | |
| 105 clusterEvalQ(cluster, library("splines")) | |
| 106 | |
| 107 recovered <- parLapply(cluster, seq_along(filenames), function(i) { | |
| 108 recover.weaker( | |
| 109 loc = i, | |
| 110 filename = filenames[[i]], | |
| 111 this.f1 = extracted[[i]], | |
| 112 this.f2 = corrected[[i]], | |
| 113 pk.times = aligned$rt_crosstab, | |
| 114 aligned.ftrs = aligned$int_crosstab, | |
| 115 orig.tol = mz_tol, | |
| 116 align.mz.tol = aligned$mz_tolerance, | |
| 117 align.chr.tol = aligned$rt_tolerance, | |
| 118 mz.range = mz_range, | |
| 119 chr.range = rt_range, | |
| 120 use.observed.range = use_observed_range, | |
| 121 bandwidth = 0.5, | |
| 122 min.bw = min_bandwidth, | |
| 123 max.bw = max_bandwidth, | |
| 124 recover.min.count = recover_min_count | |
| 125 ) | |
| 126 }) | |
| 127 | |
| 128 feature_table <- aligned$rt_crosstab[, 1:4] | |
| 129 rt_crosstab <- cbind(feature_table, sapply(recovered, function(x) x$this.times)) | |
| 130 int_crosstab <- cbind(feature_table, sapply(recovered, function(x) x$this.ftrs)) | |
| 131 | |
| 132 feature_names <- rownames(feature_table) | |
| 133 sample_names <- colnames(aligned$rt_crosstab[, - (1:4)]) | |
| 134 | |
| 135 list( | |
| 136 extracted_features = lapply(recovered, function(x) x$this.f1), | |
| 137 corrected_features = lapply(recovered, function(x) x$this.f2), | |
| 138 rt_crosstab = as_feature_crosstab(feature_names, sample_names, rt_crosstab), | |
| 139 int_crosstab = as_feature_crosstab(feature_names, sample_names, int_crosstab) | |
| 140 ) | |
| 141 } | |
| 142 | |
| 143 create_feature_sample_table <- function(features) { | |
| 144 table <- as_feature_sample_table( | |
| 145 rt_crosstab = features$rt_crosstab, | |
| 146 int_crosstab = features$int_crosstab | |
| 147 ) | |
| 148 return(table) | |
| 149 } |
