Mercurial > repos > eschen42 > w4mkmeans
comparison w4m_general_purpose_routines.R @ 6:3f72a635a075 draft default tip
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 2799299a221358b648334c3f890c4024155af73b
| author | eschen42 |
|---|---|
| date | Fri, 02 Mar 2018 08:32:17 -0500 |
| parents | 330ee1d840db |
| children |
comparison
equal
deleted
inserted
replaced
| 5:6817b036b06e | 6:3f72a635a075 |
|---|---|
| 1 ##----------------------------------------------- | |
| 2 ## helper functions for error detection/reporting | |
| 3 ##----------------------------------------------- | |
| 4 | |
| 5 # ISO 8601 date ref: https://en.wikipedia.org/wiki/ISO_8601 | |
| 6 iso_date <- function() { | |
| 7 format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z") | |
| 8 } | |
| 9 | |
| 10 # log-printing to stderr | |
| 11 log_print <- function(x, ...) { | |
| 12 cat( | |
| 13 sep="" | |
| 14 , file=stderr() | |
| 15 , iso_date() | |
| 16 , " " | |
| 17 , c(x, ...) | |
| 18 , "\n" | |
| 19 ) | |
| 20 } | |
| 21 | |
| 22 # format error for logging | |
| 23 format_error <- function(e) { | |
| 24 paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ") | |
| 25 } | |
| 26 | |
| 27 # tryCatchFunc produces a list | |
| 28 # func - a function that takes no arguments | |
| 29 # On success of func(), tryCatchFunc produces | |
| 30 # list(success = TRUE, value = func(), msg = "") | |
| 31 # On failure of func(), tryCatchFunc produces | |
| 32 # list(success = FALSE, value = NA, msg = "the error message") | |
| 33 tryCatchFunc <- function(func) { | |
| 34 retval <- NULL | |
| 35 tryCatch( | |
| 36 expr = { | |
| 37 retval <- ( list( success = TRUE, value = func(), msg = "" ) ) | |
| 38 } | |
| 39 , error = function(e) { | |
| 40 retval <<- list( success = FALSE, value = NA, msg = format_error(e) ) | |
| 41 } | |
| 42 ) | |
| 43 return (retval) | |
| 44 } | |
| 45 | |
| 1 # prepare.data.matrix - Prepare x.datamatrix for multivariate statistical analaysis (MVA) | 46 # prepare.data.matrix - Prepare x.datamatrix for multivariate statistical analaysis (MVA) |
| 2 # - Motivation: | 47 # - Motivation: |
| 3 # - Selection: | 48 # - Selection: |
| 4 # - You may want to exclude several samples from your analysis: | 49 # - You may want to exclude several samples from your analysis: |
| 5 # - If so, set the argument 'exclude.samples' to a vector of sample names | 50 # - If so, set the argument 'exclude.samples' to a vector of sample names |
| 6 # - You may want to exclude several features or features from your analysis: | 51 # - You may want to exclude several features or features from your analysis: |
| 7 # - If so, set the argument 'exclude.features' to a vector of feature names | 52 # - If so, set the argument 'exclude.features' to a vector of feature names |
| 8 # - Renaming samples: | 53 # - Renaming samples: |
| 9 # - You may want to rename several samples from your analysis: | 54 # - You may want to rename several samples from your analysis: |
| 10 # - If so, set the argument 'sample.rename.function' to a function accepting a vector | 55 # - If so, set the argument 'sample.rename.function' to a function accepting a vector |
| 11 # of sample names and producing a vector of strings of equivalent length | 56 # of sample names and producing a vector of strings of equivalent length |
| 12 # - MVA is confounded by missing values. | 57 # - MVA is confounded by missing values. |
| 13 # - By default, this function imputes missing values as zero. | 58 # - By default, this function imputes missing values as zero. |
| 14 # - For a different imputation, set the 'data.imputation' argument to a function | 59 # - For a different imputation, set the 'data.imputation' argument to a function |
| 15 # accepting a single matrix argument and returning a matrix of the same | 60 # accepting a single matrix argument and returning a matrix of the same |
| 17 # - Transformation | 62 # - Transformation |
| 18 # - It may be desirable to transform the intensity data to reduce the range. | 63 # - It may be desirable to transform the intensity data to reduce the range. |
| 19 # - By default, this function performs an eigth-root transformation: | 64 # - By default, this function performs an eigth-root transformation: |
| 20 # - Any root-tranformation has the advantage of never being negative. | 65 # - Any root-tranformation has the advantage of never being negative. |
| 21 # - Calculation of the eight-root is four times faster in my hands than log10. | 66 # - Calculation of the eight-root is four times faster in my hands than log10. |
| 22 # - However, it has the disadvantage that calculation of fold-differences | 67 # - However, it has the disadvantage that calculation of fold-differences |
| 23 # is not additive as with log-transformation. | 68 # is not additive as with log-transformation. |
| 24 # - Rather, you must divide the values and raise to the eighth power. | 69 # - Rather, you must divide the values and raise to the eighth power. |
| 25 # - For a different transformation, set the 'data.transformation' argument | 70 # - For a different transformation, set the 'data.transformation' argument |
| 26 # to a function accepting a single matrix argument. | 71 # to a function accepting a single matrix argument. |
| 27 # - The function should be written to return a matrix of the same dimensions | 72 # - The function should be written to return a matrix of the same dimensions |
| 105 , data.transformation = function(x) { | 150 , data.transformation = function(x) { |
| 106 sqrt( sqrt( sqrt(x) ) ) | 151 sqrt( sqrt( sqrt(x) ) ) |
| 107 } | 152 } |
| 108 , en = new.env() | 153 , en = new.env() |
| 109 ) { | 154 ) { |
| 155 # log to environment | |
| 156 if ( !exists("log", envir = en) ) { | |
| 157 en$log <- c() | |
| 158 } | |
| 159 enlog <- function(s) { en$log <- c(en$log, s); s } | |
| 160 #enlog("foo") | |
| 161 | |
| 110 # MatVar - Compute variance of rows or columns of a matrix | 162 # MatVar - Compute variance of rows or columns of a matrix |
| 111 # ref: http://stackoverflow.com/a/25100036 | 163 # ref: http://stackoverflow.com/a/25100036 |
| 112 # For row variance, dim == 1, for col variance, dim == 2 | 164 # For row variance, dim == 1, for col variance, dim == 2 |
| 113 MatVar <- function(x, dim = 1) { | 165 MatVar <- function(x, dim = 1) { |
| 114 if (dim == 1) { | 166 if (dim == 1) { |
| 135 } else stop("Please enter valid dimension, for rows, dim = 1; for colums, dim = 2") | 187 } else stop("Please enter valid dimension, for rows, dim = 1; for colums, dim = 2") |
| 136 } | 188 } |
| 137 | 189 |
| 138 nonzero.var <- function(x) { | 190 nonzero.var <- function(x) { |
| 139 if (nrow(x) == 0) { | 191 if (nrow(x) == 0) { |
| 140 print(str(x)) | |
| 141 stop("matrix has no rows") | 192 stop("matrix has no rows") |
| 142 } | 193 } |
| 143 if (ncol(x) == 0) { | 194 if (ncol(x) == 0) { |
| 144 print(str(x)) | |
| 145 stop("matrix has no columns") | 195 stop("matrix has no columns") |
| 146 } | 196 } |
| 147 if ( is.numeric(x) ) { | 197 if ( is.numeric(x) ) { |
| 148 # exclude any rows with zero variance | 198 # exclude any rows with zero variance |
| 149 row.vars <- MatVar(x, dim = 1) | 199 row.vars <- MatVar(x, dim = 1) |
| 151 nonzero.rows <- row.vars[nonzero.row.vars] | 201 nonzero.rows <- row.vars[nonzero.row.vars] |
| 152 if ( length(rownames(x)) != length(rownames(nonzero.rows)) ) { | 202 if ( length(rownames(x)) != length(rownames(nonzero.rows)) ) { |
| 153 row.names <- attr(nonzero.rows,"names") | 203 row.names <- attr(nonzero.rows,"names") |
| 154 x <- x[ row.names, , drop = FALSE ] | 204 x <- x[ row.names, , drop = FALSE ] |
| 155 } | 205 } |
| 156 | 206 |
| 157 # exclude any columns with zero variance | 207 # exclude any columns with zero variance |
| 158 column.vars <- MatVar(x, dim = 2) | 208 column.vars <- MatVar(x, dim = 2) |
| 159 nonzero.column.vars <- column.vars > 0 | 209 nonzero.column.vars <- column.vars > 0 |
| 160 nonzero.columns <- column.vars[nonzero.column.vars] | 210 nonzero.columns <- column.vars[nonzero.column.vars] |
| 161 if ( length(colnames(x)) != length(colnames(nonzero.columns)) ) { | 211 if ( length(colnames(x)) != length(colnames(nonzero.columns)) ) { |
| 168 | 218 |
| 169 if (is.null(x.matrix)) { | 219 if (is.null(x.matrix)) { |
| 170 stop("FATAL ERROR - prepare.data.matrix was called with null x.matrix") | 220 stop("FATAL ERROR - prepare.data.matrix was called with null x.matrix") |
| 171 } | 221 } |
| 172 | 222 |
| 223 enlog("prepare.data.matrix - get matrix") | |
| 224 | |
| 173 en$xpre <- x <- x.matrix | 225 en$xpre <- x <- x.matrix |
| 174 | 226 |
| 175 # exclude any samples as indicated | 227 # exclude any samples as indicated |
| 176 if ( !is.null(exclude.features) ) { | 228 if ( !is.null(exclude.features) ) { |
| 229 enlog("prepare.data.matrix - exclude any samples as indicated") | |
| 177 my.colnames <- colnames(x) | 230 my.colnames <- colnames(x) |
| 178 my.col.diff <- setdiff(my.colnames, exclude.features) | 231 my.col.diff <- setdiff(my.colnames, exclude.features) |
| 179 x <- x[ , my.col.diff , drop = FALSE ] | 232 x <- x[ , my.col.diff , drop = FALSE ] |
| 180 } | 233 } |
| 181 | 234 |
| 182 # exclude any features as indicated | 235 # exclude any features as indicated |
| 183 if ( !is.null(exclude.samples) ) { | 236 if ( !is.null(exclude.samples) ) { |
| 237 enlog("prepare.data.matrix - exclude any features as indicated") | |
| 184 my.rownames <- rownames(x) | 238 my.rownames <- rownames(x) |
| 185 my.row.diff <- setdiff(my.rownames, exclude.samples) | 239 my.row.diff <- setdiff(my.rownames, exclude.samples) |
| 186 x <- x[ my.row.diff, , drop = FALSE ] | 240 x <- x[ my.row.diff, , drop = FALSE ] |
| 187 } | 241 } |
| 188 | 242 |
| 189 # rename rows if desired | 243 # rename rows if desired |
| 190 if ( !is.null(sample.rename.function) ) { | 244 if ( !is.null(sample.rename.function) ) { |
| 245 enlog("prepare.data.matrix - rename rows if desired") | |
| 191 renamed <- sample.rename.function(x) | 246 renamed <- sample.rename.function(x) |
| 192 rownames(x) <- renamed | 247 rownames(x) <- renamed |
| 193 } | 248 } |
| 194 | 249 |
| 250 enlog("prepare.data.matrix - save redacted x.datamatrix to environment") | |
| 251 | |
| 195 # save redacted x.datamatrix to environment | 252 # save redacted x.datamatrix to environment |
| 196 en$redacted.data.matrix <- x | 253 en$redacted.data.matrix <- x |
| 197 | 254 |
| 198 # impute values missing from the x.datamatrix | 255 # impute values missing from the x.datamatrix |
| 199 if ( !is.null(data.imputation) ) { | 256 if ( !is.null(data.imputation) ) { |
| 257 enlog("prepare.data.matrix - impute values missing from the x.datamatrix") | |
| 200 x <- data.imputation(x) | 258 x <- data.imputation(x) |
| 201 } | 259 } |
| 202 | 260 |
| 203 # perform transformation if desired | 261 # perform transformation if desired |
| 204 if ( !is.null(data.transformation) ) { | 262 if ( !is.null(data.transformation) ) { |
| 263 enlog("prepare.data.matrix - perform transformation") | |
| 205 x <- data.transformation(x) | 264 x <- data.transformation(x) |
| 206 } else { | 265 } else { |
| 207 x <- x | 266 x <- x |
| 208 } | 267 } |
| 209 | 268 |
| 210 # purge rows and columns that have zero variance | 269 # purge rows and columns that have zero variance |
| 211 if ( is.numeric(x) ) { | 270 if ( is.numeric(x) ) { |
| 271 enlog("prepare.data.matrix - purge rows and columns that have zero variance") | |
| 212 x <- nonzero.var(x) | 272 x <- nonzero.var(x) |
| 213 } | 273 } |
| 214 | 274 |
| 215 # save imputed, transformed x.datamatrix to environment | 275 # save imputed, transformed x.datamatrix to environment |
| 216 en$imputed.transformed.data.matrix <- x | 276 en$imputed.transformed.data.matrix <- x |
| 217 | 277 |
| 218 return(x) | 278 return(x) |
| 219 } | 279 } |
| 220 | 280 |
| 221 | 281 # vim: sw=2 ts=2 et : |
| 222 ##----------------------------------------------- | |
| 223 ## helper functions for error detection/reporting | |
| 224 ##----------------------------------------------- | |
| 225 | |
| 226 # log-printing to stderr | |
| 227 log_print <- function(x, ...) { | |
| 228 cat( | |
| 229 format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z") | |
| 230 , " " | |
| 231 , c(x, ...) | |
| 232 , "\n" | |
| 233 , sep="" | |
| 234 , file=stderr() | |
| 235 ) | |
| 236 } | |
| 237 | |
| 238 # tryCatchFunc produces a list | |
| 239 # On success of expr(), tryCatchFunc produces | |
| 240 # list(success TRUE, value = expr(), msg = "") | |
| 241 # On failure of expr(), tryCatchFunc produces | |
| 242 # list(success = FALSE, value = NA, msg = "the error message") | |
| 243 tryCatchFunc <- function(expr) { | |
| 244 # format error for logging | |
| 245 format_error <- function(e) { | |
| 246 paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ") | |
| 247 } | |
| 248 my_expr <- expr | |
| 249 retval <- NULL | |
| 250 tryCatch( | |
| 251 expr = { | |
| 252 retval <- ( list( success = TRUE, value = my_expr(), msg = "" ) ) | |
| 253 } | |
| 254 , error = function(e) { | |
| 255 retval <<- list( success = FALSE, value = NA, msg = format_error(e) ) | |
| 256 } | |
| 257 ) | |
| 258 return (retval) | |
| 259 } | |
| 260 | |
| 261 # tryCatchProc produces a list | |
| 262 # On success of expr(), tryCatchProc produces | |
| 263 # list(success TRUE, msg = "") | |
| 264 # On failure of expr(), tryCatchProc produces | |
| 265 # list(success = FALSE, msg = "the error message") | |
| 266 tryCatchProc <- function(expr) { | |
| 267 # format error for logging | |
| 268 format_error <- function(e) { | |
| 269 paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ") | |
| 270 } | |
| 271 retval <- NULL | |
| 272 tryCatch( | |
| 273 expr = { | |
| 274 expr() | |
| 275 retval <- ( list( success = TRUE, msg = "" ) ) | |
| 276 } | |
| 277 , error = function(e) { | |
| 278 retval <<- list( success = FALSE, msg = format_error(e) ) | |
| 279 } | |
| 280 ) | |
| 281 return (retval) | |
| 282 } | |
| 283 |
