Mercurial > repos > galaxyp > lfq_protein_quant
comparison quantitation.r @ 0:5d630f6664c2 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/lfq_protein_quant commit 26ff08776f90f96646598a19cfcf57d42aa4a43b
| author | galaxyp |
|---|---|
| date | Tue, 02 Oct 2018 16:30:03 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:5d630f6664c2 |
|---|---|
| 1 ################################# | |
| 2 library(tidyverse) | |
| 3 library(furrr) | |
| 4 library(lme4) | |
| 5 library(MSnbase) | |
| 6 library(MSqRob) | |
| 7 | |
| 8 ##Import and preprocess data | |
| 9 ############################ | |
| 10 MSnSet2df = function(msnset){ | |
| 11 ## Converts Msnset to a tidy dataframe | |
| 12 ## Always creates feature and vector column so these shouldn't be defined by user. | |
| 13 ## convenient for downstream analysis steps. | |
| 14 if(any(c("sample", "feature", "expression") %in% c(colnames(fData(msnset)),colnames(pData(msnset))))){ | |
| 15 stop("Column names in the \"fData\" or \"pData\" slot of the \"msnset\" object cannot be named | |
| 16 \"sample\", \"feature\" or \"expression\". Please rename these columns before running the analysis.") | |
| 17 } | |
| 18 | |
| 19 dt <- as.data.frame(Biobase::exprs(msnset)) %>% mutate(feature = rownames(.)) %>% | |
| 20 gather(sample, expression, - feature, na.rm=TRUE) | |
| 21 dt <- fData(msnset) %>% mutate(feature = rownames(.)) %>% left_join(dt,. , by = 'feature') | |
| 22 dt <- pData(msnset) %>% mutate(sample = rownames(.)) %>% left_join(dt,. , by = 'sample') | |
| 23 as_data_frame(dt) | |
| 24 } | |
| 25 | |
| 26 ## robust summarisation | |
| 27 do_robust_summaristion = function(msnset, group_var = Proteins, keep_fData_cols = NULL, nIter = 20, | |
| 28 sum_fun = summarizeRobust){ | |
| 29 ## TODO use funture_map instead of mutate to speed up | |
| 30 ## Uses assumption that featureNames and sampleNames exist in every msnset | |
| 31 ## Can also be used for multiple rounds of normalization, e.g. first from PSMs to peptides, then from peptides to proteins | |
| 32 system.time({## Time how long it takes | |
| 33 group_var <- enquo(group_var) ;#group_var = quo(Proteins) | |
| 34 ## Make tidy dataframe from Msnset | |
| 35 df <- MSnSet2df(msnset) | |
| 36 ## Do summarisision according defined groups | |
| 37 dt <- filter(df, !is.na(expression)) %>% group_by(!!group_var) %>% | |
| 38 mutate(expression = sum_fun(expression, feature, sample, nIter = nIter)) %>% | |
| 39 dplyr::select(!!group_var, sample, expression) %>% | |
| 40 ## collapse to one value per group | |
| 41 distinct | |
| 42 ## Construct an Msnset object from dataframe | |
| 43 dt_exprs <- spread(dt, sample, expression) %>% ungroup | |
| 44 exprs_data <- dplyr::select(dt_exprs, - !!group_var) %>% as.matrix | |
| 45 rownames(exprs_data) <- as.character(pull(dt_exprs, !!group_var)) | |
| 46 | |
| 47 fd <- dplyr::select(dt_exprs,!!group_var) | |
| 48 | |
| 49 ##Select the group variable and all variables you want to keep | |
| 50 if (!is.null(keep_fData_cols)){ | |
| 51 fd_ext <- dplyr::select(df, !!group_var, one_of(keep_fData_cols)) %>% distinct | |
| 52 if(nrow(fd)!=nrow(fd_ext)){ | |
| 53 stop("Values in the \"group_var\" column can only correspond to a single value in the \"keep_fData_cols\" column.") | |
| 54 } | |
| 55 fd <- left_join(fd,fd_ext) | |
| 56 } | |
| 57 | |
| 58 fd <- as.data.frame(fd) | |
| 59 rownames(fd) <- as.character(pull(fd, !!group_var)) | |
| 60 out <- MSnSet(exprs_data, fData = AnnotatedDataFrame(fd) , pData = pData(msnset)[colnames(exprs_data),,drop = FALSE]) | |
| 61 }) %>% print | |
| 62 out | |
| 63 } | |
| 64 | |
| 65 summarizeRobust <- function(expression, feature, sample, nIter=100,...) { | |
| 66 ## Assumes that intensities mx are already log-transformed | |
| 67 ## characters are faster to construct and work with then factors | |
| 68 feature <- as.character(feature) | |
| 69 ##If there is only one 1 peptide for all samples return expression of that peptide | |
| 70 if (length(unique(feature)) == 1L) return(expression) | |
| 71 sample <- as.character(sample) | |
| 72 ## modelmatrix breaks on factors with 1 level so make vector of ones (will be intercept) | |
| 73 if (length(unique(sample)) == 1L) sample = rep(1,length(sample)) | |
| 74 | |
| 75 ## sum contrast on peptide level so sample effect will be mean over all peptides instead of reference level | |
| 76 X = model.matrix(~ -1 + sample + feature,contrasts.arg = list(feature = 'contr.sum')) | |
| 77 ## MasS::rlm breaks on singulare values. | |
| 78 ## check with base lm if singular values are present. | |
| 79 ## if so, these coefficients will be zero, remove this collumn from model matrix | |
| 80 ## rinse and repeat on reduced modelmatrx till no singular values are present | |
| 81 repeat { | |
| 82 fit = .lm.fit(X,expression) | |
| 83 id = fit$coefficients != 0 | |
| 84 X = X[ , id, drop =FALSE] | |
| 85 if (!any(!id)) break | |
| 86 } | |
| 87 ## Last step is always rlm | |
| 88 fit = MASS::rlm(X,expression,maxit = nIter,...) | |
| 89 ## Only return the estimated effects effects as summarised values | |
| 90 sampleid = seq_along(unique(sample)) | |
| 91 return(X[,sampleid,drop = FALSE] %*% fit$coefficients[sampleid]) | |
| 92 } | |
| 93 | |
| 94 | |
| 95 | |
| 96 | |
| 97 | |
| 98 | |
| 99 | |
| 100 | |
| 101 ## mixed models | |
| 102 ############### | |
| 103 setGeneric ( | |
| 104 name= "getBetaB", | |
| 105 def=function(model,...){standardGeneric("getBetaB")} | |
| 106 ) | |
| 107 | |
| 108 .getBetaBMermod = function(model) { | |
| 109 betaB <- c(as.vector(getME(model,"beta")),as.vector(getME(model,"b"))) | |
| 110 names(betaB) <- c(colnames(getME(model,"X")),rownames(getME(model,"Zt"))) | |
| 111 betaB | |
| 112 } | |
| 113 setMethod("getBetaB", "lmerMod", .getBetaBMermod) | |
| 114 | |
| 115 .getBetaBGlm = function(model) | |
| 116 model$coefficients | |
| 117 | |
| 118 setGeneric ( | |
| 119 name= "getVcovBetaBUnscaled", | |
| 120 def=function(model,...){standardGeneric("getVcovBetaBUnscaled")} | |
| 121 ) | |
| 122 | |
| 123 setMethod("getBetaB", "glm", .getBetaBGlm) | |
| 124 | |
| 125 .getVcovBetaBUnscaledMermod = function(model){ | |
| 126 ## TODO speed up (see code GAM4) | |
| 127 p <- ncol(getME(model,"X")) | |
| 128 q <- nrow(getME(model,"Zt")) | |
| 129 Ct <- rbind2(t(getME(model,"X")),getME(model,"Zt")) | |
| 130 Ginv <- solve(tcrossprod(getME(model,"Lambda"))+Diagonal(q,1e-18)) | |
| 131 vcovInv <- tcrossprod(Ct) | |
| 132 vcovInv[((p+1):(q+p)),((p+1):(q+p))] <- vcovInv[((p+1):(q+p)),((p+1):(q+p))]+Ginv | |
| 133 | |
| 134 solve(vcovInv) | |
| 135 } | |
| 136 | |
| 137 setMethod("getVcovBetaBUnscaled", "lmerMod", .getVcovBetaBUnscaledMermod) | |
| 138 | |
| 139 .getVcovBetaBUnscaledGlm = function(model) | |
| 140 ## cov.scaled is scaled with the dispersion, "cov.scaled" is without the dispersion! | |
| 141 ## MSqRob::getSigma is needed because regular "sigma" function can return "NaN" when sigma is very small! | |
| 142 ## This might cause contrasts that can be estimated using summary() to be NA with our approach! | |
| 143 summary(model)$cov.scaled/MSqRob::getSigma(model)^2 | |
| 144 | |
| 145 setMethod("getVcovBetaBUnscaled", "glm", .getVcovBetaBUnscaledGlm) | |
| 146 | |
| 147 ## Estimate pvalues contrasts | |
| 148 contrast_helper = function(formula, msnset, contrast = NULL){ | |
| 149 ## Gives back the coefficients you can use to make contrasts with given the formula and dataset | |
| 150 ## If a factor variable is specified (that is present in the formula) all the possible contrasts | |
| 151 ## within this variable are returned | |
| 152 contrast <- enquo(contrast) ;#contrast = quo(condition) | |
| 153 df <- MSnSet2df(msnset) | |
| 154 all_vars <- formula %>% terms %>% delete.response %>% all.vars | |
| 155 names(all_vars) <- all_vars | |
| 156 df[,all_vars] <- map2_dfr(all_vars,df[,all_vars],paste0) | |
| 157 coefficients <- c("(Intercept)", df %>% dplyr::select(all_vars) %>% unlist %>% unique %>% as.character) | |
| 158 if (contrast != ~NULL) { | |
| 159 c <- pull(df,!! contrast) %>% unique %>% sort %>% as.factor | |
| 160 comp <- combn(c,2,simplify = FALSE) | |
| 161 ## condIds = map(comp,~which( coefficients %in% .x)) | |
| 162 ## L = rep(0,length(coefficients)) | |
| 163 ## L = sapply(condIds,function(x){L[x]=c(-1,1);L}) | |
| 164 ## rownames(L) = coefficients | |
| 165 ## colnames(L) = map_chr(comp, ~paste(.x,collapse = '-')) | |
| 166 condIds <- map(comp, ~which(coefficients %in% .x)) | |
| 167 L <- rep(0,nlevels(c)) | |
| 168 L <- sapply(comp,function(x){L[x]=c(-1,1);L}) | |
| 169 rownames(L) <- levels(c) | |
| 170 colnames(L) <- map_chr(comp, ~paste(rev(.x),collapse = '-')) | |
| 171 L | |
| 172 } else coefficients | |
| 173 } | |
| 174 | |
| 175 setGeneric ( | |
| 176 name= "getXLevels", | |
| 177 def=function(model,...){standardGeneric("getXLevels")} | |
| 178 ) | |
| 179 | |
| 180 .getXLevelsGlm = function(model) | |
| 181 map2(names(model$xlevels), model$xlevels, paste0) %>% unlist | |
| 182 | |
| 183 setMethod("getXLevels", "glm", .getXLevelsGlm) | |
| 184 | |
| 185 .getXLevelsMermod = function(model) | |
| 186 getME(model,"flist") %>% map(levels) %>% unlist %>% unname | |
| 187 | |
| 188 setMethod("getXLevels", "lmerMod", .getXLevelsMermod) | |
| 189 | |
| 190 contEst <- function(model, contrasts, var, df, lfc = 0){ | |
| 191 #TODO only contrast of random effect possible and not between fixed regression terms | |
| 192 betaB <- getBetaB(model) | |
| 193 vcov <- getVcovBetaBUnscaled(model) | |
| 194 coefficients <- names(betaB) | |
| 195 id <- coefficients %in% rownames(contrasts) | |
| 196 | |
| 197 coefficients <- coefficients[id] | |
| 198 vcov <- vcov[id,id] | |
| 199 betaB <- betaB[id] | |
| 200 | |
| 201 xlevels <- getXLevels(model) | |
| 202 id <- !apply(contrasts,2,function(x){any(x[!(rownames(contrasts) %in% xlevels)] !=0)}) | |
| 203 contrasts <- contrasts[coefficients, id, drop = FALSE] | |
| 204 ## If no contrasts could be found, terminate | |
| 205 if (is.null(colnames(contrasts))) return(new_tibble(list())) | |
| 206 | |
| 207 se <- sqrt(diag(t(contrasts)%*%vcov%*%contrasts)*var) | |
| 208 logFC <- (t(contrasts)%*%betaB)[,1] | |
| 209 | |
| 210 ### Addition to allow testing against another log FC (lfc) | |
| 211 ### See https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2654802/ | |
| 212 | |
| 213 lfc <- abs(lfc) | |
| 214 aest <- abs(logFC) | |
| 215 | |
| 216 Tval <- setNames(rep(0, length(logFC)),names(se)) | |
| 217 tstat.right <- (aest - lfc)/se | |
| 218 tstat.left <- (aest + lfc)/se | |
| 219 pval <- pt(tstat.right, df = df, lower.tail = FALSE) + | |
| 220 pt(tstat.left, df = df, lower.tail = FALSE) | |
| 221 tstat.right <- pmax(tstat.right, 0) | |
| 222 fc.up <- (logFC >= lfc) | |
| 223 fc.up[is.na(fc.up)] <- FALSE | |
| 224 fc.down <- (logFC < -lfc) | |
| 225 fc.down[is.na(fc.down)] <- FALSE | |
| 226 Tval[fc.up] <- tstat.right[fc.up] | |
| 227 Tval[fc.down] <- -tstat.right[fc.down] | |
| 228 Tval[is.na(logFC)] <- NA | |
| 229 | |
| 230 new_tibble(list(contrast = colnames(contrasts), | |
| 231 logFC = logFC, | |
| 232 se = se, | |
| 233 t = Tval, | |
| 234 df = rep(df, length(se)), | |
| 235 pvalue = pval)) | |
| 236 } | |
| 237 | |
| 238 do_lmerfit = function(df, form, nIter = 10, tol = 1e-6, control = lmerControl(calc.derivs = FALSE)){ | |
| 239 fit <- lmer(form, data = df, control = control) | |
| 240 ##Initialize SSE | |
| 241 res <- resid(fit) | |
| 242 ## sseOld=sum(res^2) | |
| 243 sseOld <- fit@devcomp$cmp['pwrss'] | |
| 244 while (nIter > 0){ | |
| 245 nIter = nIter-1 | |
| 246 fit@frame$`(weights)` <- MASS::psi.huber(res/(mad(res))) | |
| 247 fit <- refit(fit) | |
| 248 res <- resid(fit) | |
| 249 ## sse=sum(res^2) | |
| 250 sse <- fit@devcomp$cmp['pwrss'] | |
| 251 if(abs(sseOld-sse)/sseOld <= tol) break | |
| 252 sseOld <- sse | |
| 253 } | |
| 254 return(fit) | |
| 255 } | |
| 256 | |
| 257 calculate_df = function(df, model, vars){ | |
| 258 ## Get all the variables in the formula that are not defined in vars | |
| 259 form <- attributes(model@frame)$formula | |
| 260 vars_formula <- all.vars(form) | |
| 261 vars_drop <- vars_formula[!vars_formula %in% vars] | |
| 262 ## Sum of number of columns -1 of Zt mtrix of each random effect that does not involve a variable in vars_drop | |
| 263 mq <- getME(model,'q_i') | |
| 264 id <- !map_lgl(names(mq),~{any(stringr::str_detect(.x,vars_drop))}) | |
| 265 p <- sum(mq[id]) - sum(id) | |
| 266 ## Sum of fixed effect parameters that do not involve a variable in vars_drop | |
| 267 mx <- getME(model,'X') | |
| 268 id <- !map_lgl(colnames(mx),~{any(stringr::str_detect(.x,vars_drop))}) | |
| 269 p <- p + sum(id) | |
| 270 | |
| 271 ## n is number of sample because 1 protein defined per sample | |
| 272 n <- n_distinct(df$sample) | |
| 273 n-p | |
| 274 } | |
| 275 | |
| 276 do_mm = function(formulas, msnset, group_vars = feature,type_df = 'traceHat' | |
| 277 , contrasts = NULL, lfc = 0, p.adjust.method = "BH", max_iter = 20L | |
| 278 , squeeze_variance = TRUE | |
| 279 , control = lmerControl(calc.derivs = FALSE) | |
| 280 ## choose parallel = plan(sequential) if you don't want parallelisation | |
| 281 ## , parallel_plan = plan(cluster, workers = makeClusterPSOCK(availableCores())) | |
| 282 , parallel = TRUE, cores = availableCores() | |
| 283 ){ | |
| 284 if(!(type_df %in% c("conservative", "traceHat"))) | |
| 285 stop("Invalid input `type_df`.") | |
| 286 | |
| 287 system.time({## can take a while | |
| 288 if (parallel){ | |
| 289 cl <- makeClusterPSOCK(cores) | |
| 290 plan(cluster, workers = cl) | |
| 291 } else { | |
| 292 plan(sequential)} | |
| 293 | |
| 294 ## future::plan(parallel_plan,gc = TRUE) | |
| 295 formulas <- map(c(formulas), ~update(.,expression ~ . )) | |
| 296 group_vars <- enquo(group_vars) # group_var = quo(protein) | |
| 297 df <- MSnSet2df(msnset) | |
| 298 | |
| 299 ## Glm adds variable name to levels in catogorical (eg for contrast) | |
| 300 ## lme4 doesnt do this for random effect, so add beforehand | |
| 301 ## Ludger needs this for Hurdle | |
| 302 df = formulas %>% map(lme4:::findbars) %>% unlist %>% map_chr(all.vars) %>% unique %>% | |
| 303 purrr::reduce(~{mutate_at(.x,.y,funs(paste0(.y,.)))}, .init=df) | |
| 304 | |
| 305 cat("Fitting mixed models\n") | |
| 306 ## select only columns needed for fitting | |
| 307 df_prot <- df %>% | |
| 308 group_by_at(vars(!!group_vars)) %>% nest %>% | |
| 309 mutate(model = furrr::future_map(data,~{ | |
| 310 for (form in formulas){ | |
| 311 fit = try(do_lmerfit(.x, form, nIter = max_iter,control = control)) | |
| 312 if (class(fit) == "lmerMod") return(fit) | |
| 313 } | |
| 314 fit | |
| 315 })) | |
| 316 | |
| 317 ## Return also failed ones afterward | |
| 318 df_prot_failed <- filter(df_prot, map_lgl(model,~{class(.x) != "lmerMod"})) | |
| 319 df_prot <- filter(df_prot, map_lgl(model, ~{class(.x)=="lmerMod"})) | |
| 320 | |
| 321 if(nrow(df_prot) == 0) {print("No models could be fitted"); return(df_prot_failed)} | |
| 322 | |
| 323 df_prot <- | |
| 324 mutate(df_prot | |
| 325 , formula = map(model,~{attributes(.@frame)$formula}) | |
| 326 ## get trace hat df for squeezeVar | |
| 327 , df = map_dbl(model, ~getDf(.x)) | |
| 328 , sigma = map_dbl(model,~{MSqRob::getSigma(.x)})) %>% | |
| 329 ## Squeeze variance | |
| 330 bind_cols(as_data_frame(MSqRob::squeezeVarRob(.$sigma^2, .$df, robust = TRUE))) %>% | |
| 331 ## mutate(var_protein = ifelse(squeeze_variance,var.post,sigma^2), | |
| 332 mutate(var_protein = if (squeeze_variance) var.post else sigma^2, | |
| 333 df_post = df + df.prior | |
| 334 , df_protein = | |
| 335 if (type_df == "conservative") | |
| 336 ## Calculate df on protein level, assumption is that there is only one protein value/run, | |
| 337 map2_dbl(data, model,~calculate_df(.x,.y, vars = colnames(pData(msnset)))) | |
| 338 else if (type_df == "traceHat") | |
| 339 ## Alternative: MSqRob implementation with trace(Hat): | |
| 340 if(squeeze_variance) df_post else df | |
| 341 ) | |
| 342 | |
| 343 ## Calculate fold changes and p values for contrast | |
| 344 cat("Estimating p-values contrasts\n") | |
| 345 df_prot <- df_prot %>% | |
| 346 mutate(contrasts = furrr::future_pmap(list(model = model, contrasts = list(contrasts), | |
| 347 var = var_protein, | |
| 348 df = df_protein, lfc = lfc), contEst)) %>% | |
| 349 ## Calculate qvalues BH | |
| 350 select_at(vars(!!group_vars, contrasts)) %>% | |
| 351 unnest %>% | |
| 352 group_by(contrast) %>% | |
| 353 mutate(qvalue = p.adjust(pvalue, method = p.adjust.method)) %>% | |
| 354 group_by_at(vars(!!group_vars)) %>% nest(.key = contrasts) %>% | |
| 355 left_join(df_prot,.) | |
| 356 } | |
| 357 ) %>% print | |
| 358 if (parallel) stopCluster(cl) | |
| 359 bind_rows(df_prot,df_prot_failed) | |
| 360 } | |
| 361 | |
| 362 read_moff = function(moff,meta){ | |
| 363 print('START READING MOFF DATA') | |
| 364 set = readMSnSet2(moff, ecol = -c(1,2),fnames = 'peptide', | |
| 365 sep = '\t',stringsAsFactors = FALSE) | |
| 366 colnames(fData(set)) = c('peptide','protein') | |
| 367 pd = read_tsv(meta) %>% column_to_rownames('sample') %>% as.data.frame | |
| 368 | |
| 369 ## fix msnbase bug 1 | |
| 370 ## if there is only 1 sample. Msnbase doesn't name it | |
| 371 if (length(sampleNames(set) ==1)) | |
| 372 sampleNames(set) = rownames(pd) | |
| 373 | |
| 374 pData(set) = pd | |
| 375 ## fix msnbase bug 2 | |
| 376 ## bug in msnbase in summarisation (samplenames should be alphabetically) | |
| 377 sample_order = order(sampleNames(set)) | |
| 378 set = MSnSet(exprs(set)[,sample_order,drop = FALSE] | |
| 379 , fData = AnnotatedDataFrame(fData(set)) | |
| 380 , pData = AnnotatedDataFrame(pData(set)[sample_order,,drop = FALSE])) | |
| 381 print('END READING MOFF DATA') | |
| 382 set | |
| 383 } | |
| 384 | |
| 385 preprocess = function(set){ | |
| 386 print('START PREPROCESSING') | |
| 387 if (ncol(set) == 1){ | |
| 388 exprs(set)[0 == (exprs(set))] <- NA | |
| 389 set = log(set, base = 2) | |
| 390 ## keep smallest unique groups | |
| 391 groups2 <- smallestUniqueGroups(fData(set)$protein,split = ',') | |
| 392 sel <- fData(set)$protein %in% groups2 | |
| 393 set <- set[sel,] | |
| 394 } else { | |
| 395 ## normalisation | |
| 396 exprs(set)[0 == (exprs(set))] <- NA | |
| 397 set <- normalize(set, 'vsn') | |
| 398 ## keep smallest unique groups | |
| 399 groups2 <- smallestUniqueGroups(fData(set)$protein,split = ',') | |
| 400 sel <- fData(set)$protein %in% groups2 | |
| 401 set <- set[sel,] | |
| 402 ## remove peptides with less then 2 observations | |
| 403 sel <- rowSums(!is.na(exprs(set))) >= 2 | |
| 404 set <- set[sel] | |
| 405 } | |
| 406 print('END PREPROCESSING') | |
| 407 set | |
| 408 } | |
| 409 | |
| 410 summarise = function(set){ | |
| 411 print('START SUMMARISATION') | |
| 412 ## Summarisation | |
| 413 if (ncol(set) == 1){ | |
| 414 set = combineFeatures(set,fun="median", groupBy = fData(set)$protein,cv = FALSE) | |
| 415 } else { | |
| 416 ## set = combineFeatures(set,fun="robust", groupBy = fData(set)$protein,cv = FALSE) | |
| 417 set = do_robust_summaristion(set,protein) | |
| 418 } | |
| 419 exprs(set) %>% as.data.frame %>% rownames_to_column('protein') %>% write_tsv('summarised_proteins.tsv') | |
| 420 print('END SUMMARISATION') | |
| 421 set | |
| 422 } | |
| 423 | |
| 424 quantify = function(set, cpu = 0){ | |
| 425 print('START QUANTITATION') | |
| 426 if ((cpu == 0) | is.na(cpu)) cpu = availableCores() | |
| 427 print(cpu) | |
| 428 form = colnames(pData(set)) %>% paste0('(1|',.,')',collapse='+') %>% paste('~',.) %>% as.formula | |
| 429 contrasts <- contrast_helper(form, set, condition) | |
| 430 res = do_mm(formulas = form, set, group_vars = c(protein) | |
| 431 , contrasts = contrasts,type_df = 'traceHat', parallel = TRUE,cores = cpu) %>% | |
| 432 filter(!map_lgl(contrasts, is.null)) %>% | |
| 433 transmute(protein, contrasts) %>% unnest %>% | |
| 434 transmute(protein | |
| 435 , comparison = str_replace_all(contrast, 'condition', '') | |
| 436 , logFC,pvalue,qvalue) %>% | |
| 437 write_tsv('quantitation.tsv') | |
| 438 print('END QUANTITATION') | |
| 439 } | |
| 440 | |
| 441 args = commandArgs(trailingOnly=TRUE) | |
| 442 moff = args[1] | |
| 443 meta = args[2] | |
| 444 summarise_only = args[3] | |
| 445 cpu = strtoi(args[4]) | |
| 446 | |
| 447 res = read_moff(moff, meta) %>% | |
| 448 preprocess %>% | |
| 449 summarise | |
| 450 if (summarise_only != 1) | |
| 451 quantify(res, cpu) | |
| 452 |
