Mercurial > repos > galaxyp > mqppep_anova
comparison mqppep_anova_script.Rmd @ 3:6f51e262d84d draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3dcf0d08f006b888061ff83eadc65e550d751869
| author | galaxyp |
|---|---|
| date | Tue, 31 Jan 2023 22:26:38 +0000 |
| parents | 1023fa7bd75f |
| children | 0ab632432798 |
comparison
equal
deleted
inserted
replaced
| 2:1023fa7bd75f | 3:6f51e262d84d |
|---|---|
| 46 meanPercentile: 50 | 46 meanPercentile: 50 |
| 47 #meanPercentile: 1 | 47 #meanPercentile: 1 |
| 48 # for small random value imputation, what should `s / mean(x)` ratio be? | 48 # for small random value imputation, what should `s / mean(x)` ratio be? |
| 49 sdPercentile: 1.0 | 49 sdPercentile: 1.0 |
| 50 # output path for imputed data file | 50 # output path for imputed data file |
| 51 imputedDataFilename: "test-data/limbo/imputedDataFilename.txt" | 51 imputedDataFilename: "test-data/imputedDataFilename.txt" |
| 52 # output path for imputed/quantile-normalized/log-transformed data file | 52 # output path for imputed/quantile-normalized/log-transformed data file |
| 53 imputedQNLTDataFile: "test-data/limbo/imputedQNLTDataFile.txt" | 53 imputedQNLTDataFile: "test-data/imputedQNLTDataFile.txt" |
| 54 # output path for contents of `stats_metadata_v` table | 54 # output path for contents of `stats_metadata_v` table |
| 55 anovaKseaMetadata: "test-data/limbo/anovaKseaMetadata.txt" | 55 anovaKseaMetadata: "test-data/anovaKseaMetadata.txt" |
| 56 # how to test one variable with > 2 categories (e.g., aov or kruskal.test) | 56 # how to test one variable with > 2 categories (e.g., aov or kruskal.test) |
| 57 oneWayManyCategories: !r c("aov", "kruskal.test", "oneway.test")[1] | 57 oneWayManyCategories: !r c("aov", "kruskal.test", "oneway.test")[1] |
| 58 # how to test one variable with 2 categories (e.g., oneway.test) | 58 # how to test one variable with 2 categories (e.g., oneway.test) |
| 59 oneWayTwoCategories: !r c("aov", "kruskal.test", "oneway.test")[3] | 59 oneWayTwoCategories: !r c("aov", "kruskal.test", "oneway.test")[3] |
| 60 # what should be the minimum quality for consideration in both | 60 # what should be the minimum quality for consideration in both |
| 97 kinaseUprtDescLutBz2: "./kinase_uniprot_description_lut.tabular.bz2" | 97 kinaseUprtDescLutBz2: "./kinase_uniprot_description_lut.tabular.bz2" |
| 98 # should debugging trace messages be printed? | 98 # should debugging trace messages be printed? |
| 99 showEnrichedSubstrates: FALSE | 99 showEnrichedSubstrates: FALSE |
| 100 # should debugging nb/nbe messages be printed? | 100 # should debugging nb/nbe messages be printed? |
| 101 printNBMsgs: FALSE | 101 printNBMsgs: FALSE |
| 102 # showld row-scaling be applied to heatmaps: "none" or "row" | 102 #printNBMsgs: TRUE |
| 103 defaultHeatMapRowScaling: "none" | 103 # should row-scaling be applied to heatmaps: "none" or "row" |
| 104 defaultHeatMapRowScaling: !r c("none", "row")[1] | |
| 105 # how missing values be displayed on heat maps: "NA" or " " | |
| 106 heatMapNAcellNote: !r c(" ", "NA")[1] | |
| 107 # how missing values be displayed on heat maps: "NA" or " " | |
| 108 heatMapNAgrey: "#D8D8D8" | |
| 109 # temporary hack | |
| 110 heatMapNAsubstitute: TRUE | |
| 111 # what color should be used for missing values be displayed on heat maps | |
| 112 heatMapNAcellColor: "grey15" | |
| 104 # should debugging trace messages be printed? | 113 # should debugging trace messages be printed? |
| 105 printTraceMsgs: FALSE | 114 printTraceMsgs: FALSE |
| 106 # when debugging files are needed, set debugFileBasePath to the path | 115 # when debugging files are needed, set debugFileBasePath to the path |
| 107 # to the directory where they should be written | 116 # to the directory where they should be written |
| 108 debugFileBasePath: !r if (TRUE) NULL else "test-data" | 117 debugFileBasePath: !r if (TRUE) NULL else "test-data" |
| 122 nbe <- if (!print_nb_messages) { | 131 nbe <- if (!print_nb_messages) { |
| 123 function(...) invisible() | 132 function(...) invisible() |
| 124 } else { | 133 } else { |
| 125 function(..., f = cat, file = stderr()) { | 134 function(..., f = cat, file = stderr()) { |
| 126 cat( | 135 cat( |
| 127 stringi::stri_unescape_unicode("\nNBE \\u2203\\u2283\\u2200"), | 136 stringi::stri_unescape_unicode("\nN.B. \\u2203\\u2283\\u2200"), |
| 128 ..., | 137 ..., |
| 129 file = file | 138 file = file |
| 130 ) | 139 ) |
| 131 } | 140 } |
| 132 } | 141 } |
| 146 | 155 |
| 147 if (print_nb_messages) nbe("library(gplots)") | 156 if (print_nb_messages) nbe("library(gplots)") |
| 148 library(gplots) | 157 library(gplots) |
| 149 if (print_nb_messages) nbe("library(caret)") | 158 if (print_nb_messages) nbe("library(caret)") |
| 150 # load caret for nearZeroVar | 159 # load caret for nearZeroVar |
| 151 if (print_nb_messages) nbe("Please ignore the messages about systemd, if any.\n") | 160 if (print_nb_messages) nbe("Please ignore any messages about systemd.\n") |
| 152 library(caret) | 161 library(caret) |
| 153 if (print_nb_messages) nbe("library(DBI)") | 162 if (print_nb_messages) nbe("library(DBI)") |
| 154 library(DBI) | 163 library(DBI) |
| 155 if (print_nb_messages) nbe("library(RSQLite)") | 164 if (print_nb_messages) nbe("library(RSQLite)") |
| 156 library(RSQLite) | 165 library(RSQLite) |
| 340 msg, "\n" | 349 msg, "\n" |
| 341 ) | 350 ) |
| 342 } | 351 } |
| 343 } | 352 } |
| 344 | 353 |
| 354 divert_warnings <- | |
| 355 function(expr, classes = "warning") { | |
| 356 withCallingHandlers( | |
| 357 expr, | |
| 358 warning = function(w) { | |
| 359 cat(" divert_warnings: ", w$message, "\n", file = stderr()) | |
| 360 if (inherits(w, classes)) | |
| 361 tryInvokeRestart("muffleWarning") | |
| 362 } | |
| 363 ) | |
| 364 } | |
| 365 | |
| 345 # ref: https://tug.org/texinfohtml/latex2e.html | 366 # ref: https://tug.org/texinfohtml/latex2e.html |
| 346 # LaTeX sets aside the following characters for special purposes. | 367 # LaTeX sets aside the following characters for special purposes. |
| 347 # For example, the percent sign % is for comments. | 368 # For example, the percent sign % is for comments. |
| 348 # They are called reserved characters or special characters. | 369 # They are called reserved characters or special characters. |
| 349 # They are all discussed elsewhere in this manual. | 370 # They are all discussed elsewhere in this manual. |
| 358 # As to the last three characters, to get a tilde in the text body font, | 379 # As to the last three characters, to get a tilde in the text body font, |
| 359 # use \~{} (omitting the curly braces would result in the next character | 380 # use \~{} (omitting the curly braces would result in the next character |
| 360 # receiving a tilde accent). | 381 # receiving a tilde accent). |
| 361 # Similarly, to get a text body font circumflex use \^{}. | 382 # Similarly, to get a text body font circumflex use \^{}. |
| 362 # To get a backslash in the font of the text body enter \textbackslash{}. | 383 # To get a backslash in the font of the text body enter \textbackslash{}. |
| 363 whack_math <- | 384 whack_math <- if (TRUE) { |
| 364 function(v) { | 385 function(v) { |
| 365 v <- as.character(v) | 386 v <- as.character(v) |
| 387 w <- v | |
| 366 w <- gsub("\\", "\\textbackslash ", v, fixed = TRUE) | 388 w <- gsub("\\", "\\textbackslash ", v, fixed = TRUE) |
| 367 w <- Reduce( | 389 w <- Reduce( |
| 368 f = function(l, r) { | 390 f = function(l, r) { |
| 369 gsub(r, paste0("\\", r), l, fixed = TRUE) | 391 ptrn <- paste0("\\", r) |
| 392 for (i in seq_along(l)) { | |
| 393 if (!grepl(ptrn, l[i], fixed = TRUE)) { | |
| 394 l[i] <- gsub(r, ptrn, l[i], fixed = TRUE) | |
| 395 } | |
| 396 } | |
| 397 l | |
| 370 }, | 398 }, |
| 371 x = c("#", "$", "%", "&", "{", "}", "_"), | 399 x = c("#", "$", "%", "&", "{", "}", "_"), |
| 372 init = w | 400 init = w |
| 373 ) | 401 ) |
| 374 w <- gsub("^", "\\^{}", w, fixed = TRUE) | 402 w <- gsub("^", "\\^{}", w, fixed = TRUE) |
| 403 w <- gsub("\\textbackslash \\_", "\\_", w, fixed = TRUE) | |
| 404 return(w) | |
| 405 } | |
| 406 } else { | |
| 407 function(v) { | |
| 408 v <- as.character(v) | |
| 409 w <- v | |
| 410 w <- gsub("\\", "\\textbackslash ", v, fixed = TRUE) | |
| 411 w <- Reduce( | |
| 412 f = function(l, r) { | |
| 413 if (length(l) > 1) { | |
| 414 cat("whack_math: deparse1(v) = `", deparse1(v), "`\n", | |
| 415 file = stderr()) | |
| 416 cat("whack_math: v = `", v, "`\n", file = stderr()) | |
| 417 cat(" Reduce f: l = `", l, | |
| 418 "` r = `", r, | |
| 419 "`\n", file = stderr() | |
| 420 ) | |
| 421 divert_warnings(grepl(r, l, fixed = TRUE)) | |
| 422 } | |
| 423 ptrn <- paste0("\\", r) | |
| 424 cat("ptrn: `", ptrn, "`\n", file = stderr()) | |
| 425 for (i in seq_along(l)) { | |
| 426 cat(" before l[i] = ", l[i], "\n", file = stderr()) | |
| 427 if (!grepl(ptrn, l[i], fixed = TRUE)) { | |
| 428 l[i] <- gsub(r, ptrn, l[i], fixed = TRUE) | |
| 429 } | |
| 430 cat(" after l[i] = ", l[i], "\n", file = stderr()) | |
| 431 } | |
| 432 l | |
| 433 }, | |
| 434 x = c("#", "$", "%", "&", "{", "}", "_"), | |
| 435 init = w | |
| 436 ) | |
| 437 w <- gsub("^", "\\^{}", w, fixed = TRUE) | |
| 438 w <- gsub("\\textbackslash \\_", "\\_", w, fixed = TRUE) | |
| 375 return(w) | 439 return(w) |
| 376 if (all(v == w)) | 440 if (all(v == w)) |
| 377 v | 441 v |
| 378 else | 442 else |
| 379 paste0("\\texttt{", w, "}") | 443 paste0("\\texttt{", w, "}") |
| 380 } | 444 } |
| 445 } | |
| 381 whack_underscores <- whack_math | 446 whack_underscores <- whack_math |
| 382 | 447 |
| 383 ## dump params to stderr (remove this eventually) | 448 ## dump params to stderr (remove this eventually) |
| 384 | 449 |
| 385 if (FALSE) nbe(see_variable(params)) | 450 if (FALSE) nbe(see_variable(params)) |
| 578 g_intensity_min_per_class <- params$intensityMinValuesPerGroup | 643 g_intensity_min_per_class <- params$intensityMinValuesPerGroup |
| 579 if (!is.integer(g_intensity_min_per_class) || g_intensity_min_per_class < 0) { | 644 if (!is.integer(g_intensity_min_per_class) || g_intensity_min_per_class < 0) { |
| 580 cat("invalid intensityMinValuesPerGroup (must be integer > -1") | 645 cat("invalid intensityMinValuesPerGroup (must be integer > -1") |
| 581 knitr::knit_exit() | 646 knitr::knit_exit() |
| 582 } | 647 } |
| 583 | 648 g_correlate_substrates <- params$correlateSubstrates |
| 584 if (is.na(as.logical(g_correlate_substrates <- params$correlateSubstrates))) { | 649 if (is.na(as.logical(g_correlate_substrates))) { |
| 585 cat("invalid correlateSubstrates (must be TRUE or FALSE)") | 650 cat("invalid correlateSubstrates (must be TRUE or FALSE)") |
| 586 knitr::knit_exit() | 651 knitr::knit_exit() |
| 587 } | 652 } |
| 588 | 653 g_filter_cov_var_gt_1 <- params$filterCovVarGT1 |
| 589 if (is.na(as.logical(g_filter_cov_var_gt_1 <- params$filterCovVarGT1))) { | 654 if (is.na(as.logical(g_filter_cov_var_gt_1))) { |
| 590 cat("invalid filterCovVarGT1 parameter (must be TRUE or FALSE)") | 655 cat("invalid filterCovVarGT1 parameter (must be TRUE or FALSE)") |
| 591 knitr::knit_exit() | 656 knitr::knit_exit() |
| 592 } | 657 } |
| 593 | 658 |
| 594 # TODO Validate >> 0 < 30 | 659 # TODO Validate >> 0 < 30 |
| 617 value = rvalue, | 682 value = rvalue, |
| 618 pos = pf | 683 pos = pf |
| 619 ) | 684 ) |
| 620 invisible(rvalue) | 685 invisible(rvalue) |
| 621 } | 686 } |
| 687 # Infix concatenation | |
| 688 `%||%` <- function(lvalue, rvalue) paste0(lvalue, rvalue) | |
| 689 | |
| 622 | 690 |
| 623 ### FUNCTIONS | 691 ### FUNCTIONS |
| 624 | 692 |
| 625 no_op <- | 693 no_op <- |
| 626 function() { | 694 function() { |
| 703 }, | 771 }, |
| 704 simplify = "array" | 772 simplify = "array" |
| 705 ) | 773 ) |
| 706 grpl_rslt <- grpl_rslt + grpl_rslt_v | 774 grpl_rslt <- grpl_rslt + grpl_rslt_v |
| 707 } | 775 } |
| 708 rslt <- unname(grpl_rslt > 0) | 776 unname(grpl_rslt > 0) |
| 709 } | 777 } |
| 710 | 778 |
| 711 ##' Produce positions in a vector where succeeding value != current valus | 779 ##' Produce positions in a vector where succeeding value != current valus |
| 712 ##' | 780 ##' |
| 713 ##' @title Search vector for neighboring positions having different values | 781 ##' @title Search vector for neighboring positions having different values |
| 826 pvalue | 894 pvalue |
| 827 } | 895 } |
| 828 | 896 |
| 829 # This code adapted from matrixcalc::is.positive.definite | 897 # This code adapted from matrixcalc::is.positive.definite |
| 830 # Notably, this simply tests without calling stop() | 898 # Notably, this simply tests without calling stop() |
| 899 # nolint start | |
| 900 # squash un-actionable cyclomatic_complexity warning | |
| 831 is_positive_definite <- function(x, tol = 1e-08) { | 901 is_positive_definite <- function(x, tol = 1e-08) { |
| 902 # nolint end | |
| 832 if (!is.matrix(x)) | 903 if (!is.matrix(x)) |
| 833 return(FALSE) | 904 return(FALSE) |
| 834 if (!is.numeric(x)) | 905 if (!is.numeric(x)) |
| 835 return(FALSE) | 906 return(FALSE) |
| 836 if (nrow(x) < 1) | 907 if (nrow(x) < 1) |
| 970 } | 1041 } |
| 971 invisible(x) | 1042 invisible(x) |
| 972 } | 1043 } |
| 973 | 1044 |
| 974 # Use this like print.data.frame, from which it is adapted: | 1045 # Use this like print.data.frame, from which it is adapted: |
| 1046 # nolint start | |
| 1047 # squash un-actionable cyclomatic_complexity warning | |
| 1048 # squash un-actionable "no visible global for ..." | |
| 975 data_frame_tabbing_latex <- | 1049 data_frame_tabbing_latex <- |
| 976 function( | 1050 function( |
| 1051 # nolint end | |
| 977 x, | 1052 x, |
| 978 # vector of tab stops, in inches | 1053 # vector of tab stops, in inches |
| 979 tabstops, | 1054 tabstops, |
| 980 # vector of headings, registered with tab-stops | 1055 # vector of headings, registered with tab-stops |
| 981 headings = colnames(x), | 1056 headings = colnames(x), |
| 1149 return(s) | 1224 return(s) |
| 1150 } | 1225 } |
| 1151 nolatex_verbatim <- | 1226 nolatex_verbatim <- |
| 1152 function(expr) eval(expr) | 1227 function(expr) eval(expr) |
| 1153 | 1228 |
| 1229 error_knitrexit_stop <- | |
| 1230 function(e) { | |
| 1231 cat("Caught error:\n\n") | |
| 1232 str(e) | |
| 1233 knitr::knit_exit() | |
| 1234 stop(e) | |
| 1235 } | |
| 1236 | |
| 1154 latex_verbatim <- | 1237 latex_verbatim <- |
| 1155 function(expr) { | 1238 function(expr) { |
| 1156 arg_string <- deparse1(substitute(expr)) | |
| 1157 cat("\n\\begin{verbatim}\n___\n") | 1239 cat("\n\\begin{verbatim}\n___\n") |
| 1158 tryCatch( | 1240 tryCatch( |
| 1159 expr = expr, | 1241 expr = expr, |
| 1160 error = param_df_exit, | 1242 error = param_df_exit, |
| 1161 #ACE error = | |
| 1162 #ACE function(e) { | |
| 1163 #ACE cat("Caught error:\n\n") | |
| 1164 #ACE str(e) | |
| 1165 #ACE knitr::knit_exit() | |
| 1166 #ACE stop(e) | |
| 1167 #ACE }, | |
| 1168 finally = cat("...\n\\end{verbatim}\n") | 1243 finally = cat("...\n\\end{verbatim}\n") |
| 1169 ) | 1244 ) |
| 1170 } | 1245 } |
| 1171 | 1246 |
| 1172 latex_samepage <- | 1247 latex_samepage <- |
| 1173 function(expr) { | 1248 function(expr) { |
| 1174 arg_string <- deparse1(substitute(expr)) | |
| 1175 cat("\n\\begin{samepage}\n") | 1249 cat("\n\\begin{samepage}\n") |
| 1176 tryCatch( | 1250 tryCatch( |
| 1177 expr = expr, | 1251 expr = expr, |
| 1178 error = param_df_exit, | 1252 error = param_df_exit, |
| 1179 #ACE error = | |
| 1180 #ACE function(e) { | |
| 1181 #ACE cat("Caught error:\n\n") | |
| 1182 #ACE str(e) | |
| 1183 #ACE knitr::knit_exit() | |
| 1184 #ACE stop(e) | |
| 1185 #ACE }, | |
| 1186 finally = cat("\n\\end{samepage}\n") | 1253 finally = cat("\n\\end{samepage}\n") |
| 1187 ) | 1254 ) |
| 1188 } | 1255 } |
| 1189 | 1256 |
| 1190 # return the result of invocation after showing parameters | 1257 # return the result of invocation after showing parameters |
| 1191 # ref: https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/ | 1258 # ref: https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/ |
| 1192 latex_show_invocation <- | 1259 latex_show_invocation <- |
| 1193 function(f, f_name = deparse1(substitute(f)), head_patch = FALSE) { | 1260 function(f, f_name = deparse1(substitute(f)), head_patch = FALSE) { |
| 1194 function(...) { | 1261 function(...) { |
| 1195 my_env <- (as.list(environment())) | |
| 1196 va <- list(...) | 1262 va <- list(...) |
| 1197 my_rslt <- new_env() | 1263 my_rslt <- new_env() |
| 1198 my_rslt$rslt <- NULL | 1264 my_rslt$rslt <- NULL |
| 1199 latex_verbatim( | 1265 latex_verbatim( |
| 1200 expr = { | 1266 expr = { |
| 1201 cat(sprintf("\n .. Local variables for '%s':\n\n", f_name)) | 1267 cat(sprintf("\n .. Local variables for '%s':\n\n", f_name)) |
| 1202 str(va) | 1268 str(va) |
| 1203 if (!head_patch) { | 1269 if (!head_patch) { |
| 1204 # return this result | 1270 # return this result |
| 1205 # ref: https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/ | 1271 # ref: |
| 1272 # https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/ | |
| 1206 cat(sprintf("\n .. Invoking '%s'\n", f_name)) | 1273 cat(sprintf("\n .. Invoking '%s'\n", f_name)) |
| 1207 tryCatch( | 1274 tryCatch( |
| 1208 { | 1275 { |
| 1209 cat("\n\\end{verbatim}\n") | 1276 cat("\n\\end{verbatim}\n") |
| 1210 rslt <- do.call(f, va) | 1277 rslt <- do.call(f, va) |
| 1211 }, | 1278 }, |
| 1212 error = param_df_exit, | 1279 error = param_df_exit, |
| 1213 #ACE error = function(e) { | |
| 1214 #ACE cat("\n\\begin{verbatim}\n") | |
| 1215 #ACE str(e) | |
| 1216 #ACE cat("\n\\end{verbatim}\n") | |
| 1217 #ACE knitr::knit_exit() | |
| 1218 #ACE stop(e) | |
| 1219 #ACE }, | |
| 1220 finally = cat("\n\\begin{verbatim}\n") | 1280 finally = cat("\n\\begin{verbatim}\n") |
| 1221 ) | 1281 ) |
| 1222 cat(sprintf("\n .. '%s' returned:\n", f_name)) | 1282 cat(sprintf("\n .. '%s' returned:\n", f_name)) |
| 1223 str(rslt) | 1283 str(rslt) |
| 1224 my_rslt$rslt <- rslt | 1284 my_rslt$rslt <- rslt |
| 1246 collapse = collapse_string | 1306 collapse = collapse_string |
| 1247 ) | 1307 ) |
| 1248 ) | 1308 ) |
| 1249 } | 1309 } |
| 1250 | 1310 |
| 1251 latex_itemized_collapsed <- function(collapse_string, v, underscore_whack = TRUE) { | 1311 latex_itemized_collapsed <- |
| 1312 function(collapse_string, v, underscore_whack = TRUE) { | |
| 1252 cat("\\begin{itemize}\n\\item ") | 1313 cat("\\begin{itemize}\n\\item ") |
| 1253 latex_collapsed_vector(collapse_string, v, underscore_whack) | 1314 latex_collapsed_vector(collapse_string, v, underscore_whack) |
| 1254 cat("\n\\end{itemize}\n") | 1315 cat("\n\\end{itemize}\n") |
| 1255 } | 1316 } |
| 1256 | 1317 |
| 1257 latex_itemized_list <- function(v, underscore_whack = TRUE) { | 1318 latex_itemized_list <- function(v, underscore_whack = TRUE) { |
| 1258 latex_itemized_collapsed("\n\\item ", v, underscore_whack) | 1319 latex_itemized_collapsed("\n\\item ", v, underscore_whack) |
| 1259 } | 1320 } |
| 1260 | 1321 |
| 1261 latex_enumerated_collapsed <- function(collapse_string, v, underscore_whack = TRUE) { | 1322 latex_enumerated_collapsed <- |
| 1323 function(collapse_string, v, underscore_whack = TRUE) { | |
| 1262 cat("\\begin{enumerate}\n\\item ") | 1324 cat("\\begin{enumerate}\n\\item ") |
| 1263 latex_collapsed_vector(collapse_string, v, underscore_whack) | 1325 latex_collapsed_vector(collapse_string, v, underscore_whack) |
| 1264 cat("\n\\end{enumerate}\n") | 1326 cat("\n\\end{enumerate}\n") |
| 1265 } | 1327 } |
| 1266 | 1328 |
| 1300 } | 1362 } |
| 1301 | 1363 |
| 1302 # N.B. use con = "" to emulate regular cat | 1364 # N.B. use con = "" to emulate regular cat |
| 1303 fcat0 <- | 1365 fcat0 <- |
| 1304 function(..., sprtr = " ", cnnctn = file()) { | 1366 function(..., sprtr = " ", cnnctn = file()) { |
| 1305 cat0(..., sep = sprtr, file = cnnctn) | 1367 gsubfn::cat0(..., sep = sprtr, file = cnnctn) |
| 1306 invisible(cnnctn) | 1368 invisible(cnnctn) |
| 1307 } | 1369 } |
| 1308 | 1370 |
| 1309 hypersub <- | 1371 hypersub <- |
| 1310 function(s) { | 1372 function(s) { |
| 1426 #' | 1488 #' |
| 1427 #' Hornbeck et al. (2015) Nucleic Acids Res. 43:D512-20 | 1489 #' Hornbeck et al. (2015) Nucleic Acids Res. 43:D512-20 |
| 1428 #' | 1490 #' |
| 1429 #' Horn et al. (2014) Nature Methods 11(6):603-4 | 1491 #' Horn et al. (2014) Nature Methods 11(6):603-4 |
| 1430 #' | 1492 #' |
| 1493 # nolint start | |
| 1494 # squash un-actionable cyclomatic_complexity warning | |
| 1431 ksea_scores <- function( | 1495 ksea_scores <- function( |
| 1496 # nolint end | |
| 1432 # For human data, typically, ksdata = KSEAapp::ksdata | 1497 # For human data, typically, ksdata = KSEAapp::ksdata |
| 1433 ksdata, | 1498 ksdata, |
| 1434 | 1499 |
| 1435 # Input data file having columns: | 1500 # Input data file having columns: |
| 1436 # - Protein : abbreviated protein name | 1501 # - Protein : abbreviated protein name |
| 1454 | 1519 |
| 1455 # Minimum substrate count, necessary to adjust the p-value appropriately. | 1520 # Minimum substrate count, necessary to adjust the p-value appropriately. |
| 1456 minimum_substrate_count | 1521 minimum_substrate_count |
| 1457 | 1522 |
| 1458 ) { | 1523 ) { |
| 1524 if (FALSE) { | |
| 1525 if (print_nb_messages) nbe("Output contents of `ksdata` table\n") | |
| 1526 cat_variable(ksdata) | |
| 1527 cat("\n\\clearpage\n") | |
| 1528 data_frame_tabbing_latex( | |
| 1529 x = ksdata[order(ksdata$GENE, ksdata$SUB_GENE), ], | |
| 1530 tabstops = c(0.3, 0.3, 1.2, 0.3, 0.3, 0.3, 0.3, | |
| 1531 0.8, 0.3, 0.8, 0.3, 0.3, 0.3), | |
| 1532 caption = "ksdata dump", | |
| 1533 use_subsubsection_header = FALSE | |
| 1534 ) | |
| 1535 } | |
| 1459 # no px$FC should be <= 0, but abs(px$FC) is used below as a precaution. | 1536 # no px$FC should be <= 0, but abs(px$FC) is used below as a precaution. |
| 1460 if (length(grep(";", px$Residue.Both)) == 0) { | 1537 if (length(grep(";", px$Residue.Both)) == 0) { |
| 1461 # There are no Residue.Both entries having semicolons, so new is | 1538 # There are no Residue.Both entries having semicolons, so new is |
| 1462 # simply px except two columns are renamed and a column is added | 1539 # simply px except two columns are renamed and a column is added |
| 1463 # for log2(abs(fold-change)) | 1540 # for log2(abs(fold-change)) |
| 1507 # To take the magnitude into account without taking the direction into | 1584 # To take the magnitude into account without taking the direction into |
| 1508 # account, set params$kseaUseAbsoluteLog2FC to TRUE | 1585 # account, set params$kseaUseAbsoluteLog2FC to TRUE |
| 1509 # | 1586 # |
| 1510 # Should KSEA be performed aggregating signed log2FC or absolute? | 1587 # Should KSEA be performed aggregating signed log2FC or absolute? |
| 1511 # FALSE use raw log2FC for KSEA as for KSEAapp::KSEA.Scores | 1588 # FALSE use raw log2FC for KSEA as for KSEAapp::KSEA.Scores |
| 1589 # nolint start | |
| 1590 # squash un-actionable "no visible global for ..." | |
| 1512 if (params$kseaUseAbsoluteLog2FC) { | 1591 if (params$kseaUseAbsoluteLog2FC) { |
| 1592 # nolint end | |
| 1513 # TRUE use abs(log2FC) for KSEA as Justin requested; this is a | 1593 # TRUE use abs(log2FC) for KSEA as Justin requested; this is a |
| 1514 # justifiable deviation from the KSEAapp::KSEA.Scores algorithm. | 1594 # justifiable deviation from the KSEAapp::KSEA.Scores algorithm. |
| 1515 new$log2_fc <- abs(new$log2_fc) | 1595 new$log2_fc <- abs(new$log2_fc) |
| 1516 } | 1596 } |
| 1517 | 1597 |
| 1520 # set to TRUE to monitor filtration on stderr | 1600 # set to TRUE to monitor filtration on stderr |
| 1521 sink(stderr()) | 1601 sink(stderr()) |
| 1522 cat(see_variable(networkin, "\n")) | 1602 cat(see_variable(networkin, "\n")) |
| 1523 } | 1603 } |
| 1524 ksdata_filtered <- | 1604 ksdata_filtered <- |
| 1525 sqldf( | 1605 sqldf::sqldf( |
| 1526 sprintf("%s %s", | 1606 sprintf("%s %s", |
| 1527 "select * from ksdata where not Source = 'NetworKIN'", | 1607 "select * from ksdata where not Source = 'NetworKIN'", |
| 1528 if (networkin) | 1608 if (networkin) |
| 1529 sprintf("or networkin_score >= %d", networkin_cutoff) | 1609 sprintf("or networkin_score >= %d", networkin_cutoff) |
| 1530 else | 1610 else |
| 1531 "" | 1611 "" |
| 1532 ) | 1612 ) |
| 1533 ) | 1613 ) |
| 1614 if (FALSE) write.csv(x = ksdata_filtered, file = "ksdata_filtered.csv") | |
| 1534 if (monitor_filtration_on_stderr) { | 1615 if (monitor_filtration_on_stderr) { |
| 1535 cat(see_variable(sqldf( | 1616 cat(see_variable(sqldf::sqldf( |
| 1536 "select count(*), Source from ksdata group by Source"), "\n")) | 1617 "select count(*), Source from ksdata group by Source"), "\n")) |
| 1537 cat(see_variable(sqldf( | 1618 cat(see_variable(sqldf::sqldf( |
| 1538 "select count(*), Source from ksdata_filtered group by Source"), "\n")) | 1619 "select count(*), Source from ksdata_filtered group by Source"), "\n")) |
| 1539 sink() | 1620 sink() |
| 1540 } | 1621 } |
| 1541 | 1622 |
| 1542 ############################################################################ | 1623 ############################################################################ |
| 1602 ksdata_dataset_abbrev$Substrate.Gene, | 1683 ksdata_dataset_abbrev$Substrate.Gene, |
| 1603 ksdata_dataset_abbrev$Substrate.Mod, | 1684 ksdata_dataset_abbrev$Substrate.Mod, |
| 1604 ksdata_dataset_abbrev$p), | 1685 ksdata_dataset_abbrev$p), |
| 1605 ] | 1686 ] |
| 1606 if (print_nb_messages) nbe(see_variable(ksdata_dataset_abbrev)) | 1687 if (print_nb_messages) nbe(see_variable(ksdata_dataset_abbrev)) |
| 1688 if (FALSE) write.csv( | |
| 1689 x = ksdata_dataset_abbrev, | |
| 1690 file = "ksdata_dataset_abbrev_no_aggregation.csv", | |
| 1691 row.names = FALSE | |
| 1692 ) | |
| 1693 | |
| 1694 # For some kinases, the only difference between kinase names in two databases | |
| 1695 # is the case of the the letters; dedup to eliminate this false distinction | |
| 1696 # The only imperfection with this approach is when the NetworKIN-only fliter | |
| 1697 # is in force and there is a duplication between NetworKIN and HPRD (which | |
| 1698 # outranks NetworKIN). Presently, the script does not invoke with this | |
| 1699 # function with the NetworKIN-only filter active. | |
| 1700 dedup_sql <- " | |
| 1701 select | |
| 1702 r.'Kinase.Gene', r.'Substrate.Gene', r.'Substrate.Mod', | |
| 1703 r.Peptide, r.p, r.FC, r.log2FC, r.Source | |
| 1704 from | |
| 1705 ( select | |
| 1706 rowid, | |
| 1707 rank() over | |
| 1708 (partition by | |
| 1709 lower(k.'Kinase.Gene'), k.p, k.FC | |
| 1710 order by | |
| 1711 k.Source | |
| 1712 ) as rank, | |
| 1713 * | |
| 1714 from | |
| 1715 ksdata_dataset_abbrev k | |
| 1716 ) as r | |
| 1717 where r.rank = 1 | |
| 1718 " | |
| 1719 ksdata_dataset_abbrev <- sqldf::sqldf(x = dedup_sql) | |
| 1720 if (FALSE) write.csv( | |
| 1721 x = ksdata_dataset_abbrev, | |
| 1722 file = "ksdata_dataset_abbrev_no_aggregation_dedup.csv", | |
| 1723 row.names = FALSE | |
| 1724 ) | |
| 1725 | |
| 1607 # First aggregation step to account for multiply phosphorylated peptides | 1726 # First aggregation step to account for multiply phosphorylated peptides |
| 1608 # and differing peptide sequences; the goal here is to combine results | 1727 # and differing peptide sequences; the goal here is to combine results |
| 1609 # for all measurements of the same substrate. | 1728 # for all measurements of the same substrate. |
| 1610 # SELECT `Kinase.Gene`, `Substrate.Gene`, `Substrate.Mod`, | 1729 # SELECT `Kinase.Gene`, `Substrate.Gene`, `Substrate.Mod`, |
| 1611 # `Source`, avg(log2FC) AS log2FC | 1730 # `Source`, avg(log2FC) AS log2FC |
| 1654 log2FC ~ Kinase.Gene, | 1773 log2FC ~ Kinase.Gene, |
| 1655 data = ksdata_dataset_abbrev, | 1774 data = ksdata_dataset_abbrev, |
| 1656 FUN = mean | 1775 FUN = mean |
| 1657 ) | 1776 ) |
| 1658 if (print_nb_messages) nbe(see_variable(mean_fc), "\n") | 1777 if (print_nb_messages) nbe(see_variable(mean_fc), "\n") |
| 1778 if (FALSE) write.csv(x = mean_fc, file = "mean_fc.csv") | |
| 1659 | 1779 |
| 1660 # for contrast j | 1780 # for contrast j |
| 1661 # for each kinase i | 1781 # for each kinase i |
| 1662 # extract log2 of fold-change (from `new` above) | 1782 # extract log2 of fold-change (from `new` above) |
| 1663 # (used in KSEA.Scores.R lines # 130 & 132) | 1783 # (used in KSEA.Scores.R lines # 130 & 132) |
| 1679 mean(log2_fc_j_each_i, na.rm = TRUE) | 1799 mean(log2_fc_j_each_i, na.rm = TRUE) |
| 1680 | 1800 |
| 1681 # Reorder mean_fc, although I don't see why | 1801 # Reorder mean_fc, although I don't see why |
| 1682 # (KSEA.Scores.R line 128 | 1802 # (KSEA.Scores.R line 128 |
| 1683 mean_fc <- mean_fc[order(mean_fc[, 1]), ] | 1803 mean_fc <- mean_fc[order(mean_fc[, 1]), ] |
| 1804 if (FALSE) write.csv(x = mean_fc, file = "mean_fc.csv") | |
| 1684 # mean_fc columns so far: "Kinase.Gene", "log2FC" | 1805 # mean_fc columns so far: "Kinase.Gene", "log2FC" |
| 1685 # - Kinase.Gene | 1806 # - Kinase.Gene |
| 1686 # indicates the gene name for each kinase. | 1807 # indicates the gene name for each kinase. |
| 1687 | 1808 |
| 1688 # Create column 3: mS | 1809 # Create column 3: mS |
| 1734 , | 1855 , |
| 1735 drop = FALSE | 1856 drop = FALSE |
| 1736 ] | 1857 ] |
| 1737 } | 1858 } |
| 1738 | 1859 |
| 1739 #ACE nb(see_variable(nrow(mean_fc)), "\n") | |
| 1740 # Create column 8: FDR, deduced by Benjamini-Hochberg adustment from p-value | 1860 # Create column 8: FDR, deduced by Benjamini-Hochberg adustment from p-value |
| 1741 # - FDR | 1861 # - FDR |
| 1742 # is the p-value adjusted for multiple hypothesis testing | 1862 # is the p-value adjusted for multiple hypothesis testing |
| 1743 # using the Benjamini & Hochberg method." | 1863 # using the Benjamini & Hochberg method." |
| 1744 # (KSEA.Scores.R line # 134) | 1864 # (KSEA.Scores.R line # 134) |
| 1745 mean_fc$fdr <- | 1865 mean_fc$fdr <- |
| 1746 p.adjust(mean_fc$p_value, method = "fdr") | 1866 p.adjust(mean_fc$p_value, method = "fdr") |
| 1747 | 1867 |
| 1748 # It makes no sense to leave Z-scores negative when using | 1868 # It makes no sense to leave Z-scores negative when using |
| 1749 # absolute value of fold-change | 1869 # absolute value of fold-change |
| 1870 # nolint start | |
| 1871 # squash un-actionable "no visible global for ..." | |
| 1750 if (params$kseaUseAbsoluteLog2FC) { | 1872 if (params$kseaUseAbsoluteLog2FC) { |
| 1873 # nolint end | |
| 1751 mean_fc$z_score <- abs(mean_fc$z_score) | 1874 mean_fc$z_score <- abs(mean_fc$z_score) |
| 1752 } | 1875 } |
| 1753 | 1876 |
| 1754 # Remove second column (log2FC), which is duplicated as mS | 1877 # Remove second column (log2FC), which is duplicated as mS |
| 1755 # (KSEA.Scores.R line # 136) | 1878 # (KSEA.Scores.R line # 136) |
| 1807 } | 1930 } |
| 1808 ) | 1931 ) |
| 1809 | 1932 |
| 1810 k <- k[selector < ksea_cutoff_threshold, ] | 1933 k <- k[selector < ksea_cutoff_threshold, ] |
| 1811 nrow_k <- nrow(k) | 1934 nrow_k <- nrow(k) |
| 1812 | |
| 1813 #ACE nbe(see_variable(fdr_barplot_dataframe <- k)) | |
| 1814 | 1935 |
| 1815 if (nrow_k > 0) { | 1936 if (nrow_k > 0) { |
| 1816 max_nchar_rowname <- max(nchar(rownames(k))) | 1937 max_nchar_rowname <- max(nchar(rownames(k))) |
| 1817 my_cex_names <- 1.0 / (1 + nrow_k / 50) | 1938 my_cex_names <- 1.0 / (1 + nrow_k / 50) |
| 1818 | 1939 |
| 1976 "m_s", | 2097 "m_s", |
| 1977 "z_score", | 2098 "z_score", |
| 1978 "p_value", | 2099 "p_value", |
| 1979 "fdr" | 2100 "fdr" |
| 1980 ) | 2101 ) |
| 1981 #ACE output_order <- with(output_df, order(fdr)) | 2102 |
| 2103 # nolint start | |
| 1982 output_order <- with(output_df, order(p_value)) | 2104 output_order <- with(output_df, order(p_value)) |
| 2105 # nolint end | |
| 1983 output_df <- output_df[output_order, ] | 2106 output_df <- output_df[output_order, ] |
| 1984 | 2107 |
| 1985 output_df[, 2] <- sprintf("%0.3g", output_df[, 2]) | 2108 output_df[, 2] <- sprintf("%0.3g", output_df[, 2]) |
| 1986 output_df$fdr <- sprintf("%0.4f", output_df$fdr) | 2109 output_df$fdr <- sprintf("%0.4f", output_df$fdr) |
| 1987 output_df$p_value <- sprintf("%0.2e", output_df$p_value) | 2110 output_df$p_value <- sprintf("%0.2e", output_df$p_value) |
| 1988 output_df$z_score <- sprintf("%0.2f", output_df$z_score) | 2111 output_df$z_score <- sprintf("%0.2f", output_df$z_score) |
| 1989 output_df$m_s <- sprintf("%d", output_df$m_s) | 2112 output_df$m_s <- sprintf("%d", output_df$m_s) |
| 1990 output_df$enrichment <- sprintf("%0.3g", output_df$enrichment) | 2113 output_df$enrichment <- sprintf("%0.3g", output_df$enrichment) |
| 1991 output_ncol <- ncol(output_df) | |
| 1992 colnames(output_df) <- | 2114 colnames(output_df) <- |
| 1993 c( | 2115 c( |
| 1994 "Kinase", | 2116 "Kinase or motif", |
| 1995 "\\(\\overline{{\\lvert}\\log_2 (\\text{fold-change}){\\rvert}}\\)", | 2117 "\\(\\overline{{\\lvert}\\log_2 (\\text{fold-change}){\\rvert}}\\)", |
| 1996 "Enrichment", | 2118 "Enrichment", |
| 1997 "Substrates", | 2119 "Substrates", |
| 1998 "z-score", | 2120 "z-score", |
| 1999 "p-value", | 2121 "p-value", |
| 2020 ) | 2142 ) |
| 2021 if (sum(selector < ksea_cutoff_threshold) > 0) { | 2143 if (sum(selector < ksea_cutoff_threshold) > 0) { |
| 2022 if (print_nb_messages) nbe(see_variable(output_df)) | 2144 if (print_nb_messages) nbe(see_variable(output_df)) |
| 2023 math_caption <- gsub("{", "\\{", caption, fixed = TRUE) | 2145 math_caption <- gsub("{", "\\{", caption, fixed = TRUE) |
| 2024 math_caption <- gsub("}", "\\}", math_caption, fixed = TRUE) | 2146 math_caption <- gsub("}", "\\}", math_caption, fixed = TRUE) |
| 2025 # with ( | |
| 2026 # output_df, | |
| 2027 # ) | |
| 2028 if (TRUE) { | 2147 if (TRUE) { |
| 2029 output_df$Kinase <- whack_underscores(output_df$Kinase) | |
| 2030 data_frame_tabbing_latex( | 2148 data_frame_tabbing_latex( |
| 2031 x = output_df, | 2149 x = output_df, |
| 2032 # vector of tab stops, in inches | 2150 # vector of tab stops, in inches |
| 2033 tabstops = c(1.0, 1.2, 1.0, 1.0, 1.0, 1.0), | 2151 tabstops = c(1.8, 1.2, 0.8, 0.8, 0.8, 0.8, 0.8), |
| 2034 # vector of headings, registered with tab-stops | 2152 # vector of headings, registered with tab-stops |
| 2035 headings = colnames(output_df), | 2153 headings = colnames(output_df), |
| 2036 # digits to pass to format.data.frame | 2154 # digits to pass to format.data.frame |
| 2037 digits = NULL, | 2155 digits = NULL, |
| 2038 # maximumn number of rows to print | 2156 # maximumn number of rows to print |
| 2055 # https://tug.org/texinfohtml/latex2e.html#Font-sizes | 2173 # https://tug.org/texinfohtml/latex2e.html#Font-sizes |
| 2056 charactersize = "small", | 2174 charactersize = "small", |
| 2057 # set verbatim to TRUE to debug output | 2175 # set verbatim to TRUE to debug output |
| 2058 verbatim = FALSE | 2176 verbatim = FALSE |
| 2059 ) | 2177 ) |
| 2060 } else { | |
| 2061 data_frame_table_latex( | |
| 2062 x = output_df, | |
| 2063 justification = "l c c c c c c", | |
| 2064 centered = TRUE, | |
| 2065 caption = sprintf( | |
| 2066 "\\text{%s}, KSEA %s < %s", | |
| 2067 math_caption, | |
| 2068 ksea_cutoff_statistic, | |
| 2069 ksea_cutoff_threshold | |
| 2070 ), | |
| 2071 anchor = anchor, | |
| 2072 underscore_whack = FALSE | |
| 2073 ) | |
| 2074 } | 2178 } |
| 2075 } else { | 2179 } else { |
| 2076 cat( | 2180 cat( |
| 2077 sprintf( | 2181 sprintf( |
| 2078 "\\break | 2182 "\\break |
| 2130 mycol <- unique(append(mycol_neg, mycol_pos)) | 2234 mycol <- unique(append(mycol_neg, mycol_pos)) |
| 2131 color_breaks <- list(breaks_all, mycol) | 2235 color_breaks <- list(breaks_all, mycol) |
| 2132 return(color_breaks) | 2236 return(color_breaks) |
| 2133 } | 2237 } |
| 2134 | 2238 |
| 2239 # nolint start | |
| 2240 # squash un-actionable cyclomatic_complexity warning | |
| 2135 hm2plus <- function( | 2241 hm2plus <- function( |
| 2242 # nolint end | |
| 2136 x, | 2243 x, |
| 2137 mat = matrix( | 2244 mat = matrix( |
| 2138 c( | 2245 c( |
| 2139 c(0, 4, 0), | 2246 c(0, 4, 0), |
| 2140 c(0, 3, 3), | 2247 c(0, 3, 3), |
| 2159 key_par = list(), | 2266 key_par = list(), |
| 2160 hclustfun = hclust, | 2267 hclustfun = hclust, |
| 2161 ... | 2268 ... |
| 2162 ) { | 2269 ) { |
| 2163 | 2270 |
| 2164 varargs <- list(...) | |
| 2165 if (FALSE) # this is to avoid commenting out code to pass linting... | 2271 if (FALSE) # this is to avoid commenting out code to pass linting... |
| 2166 my_hm2 <- latex_show_invocation(heatmap.2, head_patch = FALSE) | 2272 my_hm2 <- latex_show_invocation(gplots::heatmap.2, head_patch = FALSE) |
| 2167 else | 2273 else |
| 2168 my_hm2 <- heatmap.2 | 2274 my_hm2 <- gplots::heatmap.2 |
| 2169 | 2275 |
| 2170 x <- as.matrix(x) | 2276 x <- as.matrix(x) |
| 2171 if (sum(!is.na(x)) < 1) | 2277 if (sum(!is.na(x)) < 1) |
| 2172 return(NULL) | 2278 return(NULL) |
| 2173 color_count <- 1 + max(64, length(as.vector(x))) # 8 was not enough | 2279 color_count <- 1 + max(64, length(as.vector(x))) # 8 was not enough |
| 2175 min_nonax <- min(x, na.rm = TRUE) | 2281 min_nonax <- min(x, na.rm = TRUE) |
| 2176 max_nonax <- max(x, na.rm = TRUE) | 2282 max_nonax <- max(x, na.rm = TRUE) |
| 2177 if (print_nb_messages) nb("within hm2plus", see_variable(divergent), "\n") | 2283 if (print_nb_messages) nb("within hm2plus", see_variable(divergent), "\n") |
| 2178 if (divergent) { | 2284 if (divergent) { |
| 2179 zlim <- max(abs(min_nonax), abs(max_nonax)) | 2285 zlim <- max(abs(min_nonax), abs(max_nonax)) |
| 2180 if (print_nb_messages) nb(see_variable(pre_zlim <- zlim, "\n")) | 2286 if (print_nb_messages) nb(see_variable(zlim, "\n")) |
| 2181 breaks <- (zlim) / (break_count:1) | 2287 breaks <- (zlim) / (break_count:1) |
| 2182 if (print_nb_messages) nb(see_variable(breaks, "\n")) | 2288 if (print_nb_messages) nb(see_variable(breaks, "\n")) |
| 2183 breaks <- breaks - median(breaks) | 2289 breaks <- breaks - median(breaks) |
| 2184 zlim <- c(-zlim, zlim) | 2290 zlim <- c(-zlim, zlim) |
| 2185 if (print_nb_messages) nb(see_variable(zlim, "\n")) | 2291 if (print_nb_messages) nb(see_variable(zlim, "\n")) |
| 2186 } else { | 2292 } else { |
| 2187 zlim <- max(abs(min_nonax), abs(max_nonax)) | 2293 zlim <- max(abs(min_nonax), abs(max_nonax)) |
| 2188 if (print_nb_messages) nb(see_variable(pre_zlim <- zlim, "\n")) | 2294 if (print_nb_messages) nb(see_variable(zlim, "\n")) |
| 2189 breaks <- zlim / (break_count:1) | 2295 breaks <- zlim / (break_count:1) |
| 2190 if (print_nb_messages) nb(see_variable(breaks, "\n")) | 2296 if (print_nb_messages) nb(see_variable(breaks, "\n")) |
| 2191 if (max_nonax < 0) { | 2297 if (max_nonax < 0) { |
| 2192 breaks <- breaks - zlim | 2298 breaks <- breaks - zlim |
| 2193 zlim <- c(-zlim, 0) | 2299 zlim <- c(-zlim, 0) |
| 2195 zlim <- c(0, zlim) | 2301 zlim <- c(0, zlim) |
| 2196 } | 2302 } |
| 2197 if (print_nb_messages) nb(see_variable(zlim, "\n")) | 2303 if (print_nb_messages) nb(see_variable(zlim, "\n")) |
| 2198 } | 2304 } |
| 2199 nonax <- x | 2305 nonax <- x |
| 2200 nonax[is.na(x)] <- min_nonax | 2306 # nolint start |
| 2307 # squash un-actionable "no visible global for ..." | |
| 2308 if (params$heatMapNAsubstitute) nonax[is.na(x)] <- min_nonax | |
| 2309 # nolint end | |
| 2201 if (is.null(widths)) widths <- c(denwid, 4 - denwid, 1.5) | 2310 if (is.null(widths)) widths <- c(denwid, 4 - denwid, 1.5) |
| 2202 if (is.null(heights)) heights <- c(0.4, denhgt, 4.0) | 2311 if (is.null(heights)) heights <- c(0.4, denhgt, 4.0) |
| 2203 colors <- | 2312 colors <- |
| 2204 if (divergent && min_nonax < 0) { | 2313 if (divergent && min_nonax < 0) { |
| 2205 # divergent colors on both sides of zero | 2314 # divergent colors on both sides of zero |
| 2210 } else if (divergent && max_nonax < 0) { | 2319 } else if (divergent && max_nonax < 0) { |
| 2211 # "divergent" colors < zero | 2320 # "divergent" colors < zero |
| 2212 colorRampPalette(c("red", "white"))(color_count) | 2321 colorRampPalette(c("red", "white"))(color_count) |
| 2213 } else { | 2322 } else { |
| 2214 # "non-divergent" colors including zero | 2323 # "non-divergent" colors including zero |
| 2215 hcl.colors(color_count, "YlOrRd", rev = TRUE) | 2324 tmp <- hcl.colors(color_count, "YlOrRd", rev = TRUE) |
| 2325 # nolint start | |
| 2326 # squash un-actionable "no visible global for ..." | |
| 2327 tmp[1] <- params$heatMapNAgrey | |
| 2328 # nolint end | |
| 2329 tmp | |
| 2216 } | 2330 } |
| 2217 | 2331 |
| 2218 #ACE if (print_nb_messages) nb("within hm2plus", see_variable(key_par), "\n") | 2332 nbe("There are ", sum(is.na(x)), " NA values") |
| 2219 #ACE if (print_nb_messages) nb(see_variable(colors, "\n")) | 2333 |
| 2220 #ACE key_par$col = colors | |
| 2221 #ACE key_par$breaks = breaks | |
| 2222 | |
| 2223 if (print_nb_messages) nb(see_variable(par(), "\n")) #ACE TODO remove me | |
| 2224 if (print_nb_messages) cat("\\leavevmode\n\\linebreak\n") #ACE TODO remove me | |
| 2225 suppressWarnings( | 2334 suppressWarnings( |
| 2226 my_hm2( | 2335 my_hm2( |
| 2227 x = x, | 2336 x = x, |
| 2228 col = colors, | 2337 col = colors, |
| 2229 #ACE symkey = FALSE, | |
| 2230 density.info = density_info, | 2338 density.info = density_info, |
| 2231 srtCol = srtcol, | 2339 srtCol = srtcol, |
| 2232 srtRow = srtrow, | 2340 srtRow = srtrow, |
| 2233 margins = margins, | 2341 margins = margins, |
| 2234 lwid = widths, | 2342 lwid = widths, |
| 2239 lmat = mat, | 2347 lmat = mat, |
| 2240 notecol = notecol, | 2348 notecol = notecol, |
| 2241 trace = trace, | 2349 trace = trace, |
| 2242 bg = "yellow", | 2350 bg = "yellow", |
| 2243 hclustfun = hclustfun, | 2351 hclustfun = hclustfun, |
| 2244 #ACE breaks = breaks, | |
| 2245 oldstyle = FALSE, | 2352 oldstyle = FALSE, |
| 2246 ... # varargs | 2353 ... # varargs |
| 2247 ) | 2354 ) |
| 2248 ) | 2355 ) |
| 2249 # implicitly returning value returned by heatmap.2 | 2356 # implicitly returning value returned by heatmap.2 |
| 2250 } | 2357 } |
| 2251 | 2358 |
| 2252 # draw_kseaapp_summary_heatmap is a helper function for ksea_heatmap | 2359 # draw_kseaapp_summary_heatmap is a helper function for ksea_heatmap |
| 2360 # nolint start | |
| 2361 # squash un-actionable cyclomatic_complexity warning | |
| 2253 draw_kseaapp_summary_heatmap <- function( | 2362 draw_kseaapp_summary_heatmap <- function( |
| 2363 # nolint end | |
| 2254 x, # matrix with row/col names already formatted | 2364 x, # matrix with row/col names already formatted |
| 2255 sample_cluster, # a binary input of TRUE or FALSE, | 2365 sample_cluster, # a binary input of TRUE or FALSE, |
| 2256 # indicating whether or not to perform | 2366 # indicating whether or not to perform |
| 2257 # hierarchical clustering of the sample columns | 2367 # hierarchical clustering of the sample columns |
| 2258 merged_asterisk, # matrix having dimensions of x, values "*" or "" | 2368 merged_asterisk, # matrix having dimensions of x, values "*" or "" |
| 2293 cat_variable(x) | 2403 cat_variable(x) |
| 2294 return(FALSE) | 2404 return(FALSE) |
| 2295 } | 2405 } |
| 2296 max_nchar_rowname <- max(nchar(rownames(x))) | 2406 max_nchar_rowname <- max(nchar(rownames(x))) |
| 2297 max_nchar_colname <- max(nchar(colnames(x))) | 2407 max_nchar_colname <- max(nchar(colnames(x))) |
| 2298 my_limit <- g_intensity_hm_rows | |
| 2299 | 2408 |
| 2300 my_row_cex_scale <- master_cex * 150 / nrow_x | 2409 my_row_cex_scale <- master_cex * 150 / nrow_x |
| 2301 #ACE row cex shrink hack begin | 2410 # row cex shrink hack begin |
| 2302 my_row_cex_scale <- master_cex * 150 / max(nrow_x, ncol_x) | 2411 my_row_cex_scale <- master_cex * 150 / max(nrow_x, ncol_x) |
| 2303 #ACE row cex shrink hack end | 2412 # row cex shrink hack end |
| 2304 | 2413 |
| 2305 my_col_cex_scale <- 3.0 | 2414 my_col_cex_scale <- 3.0 |
| 2306 #ACE col cex shrink hack begin | 2415 # col cex shrink hack begin |
| 2307 if (ncol_x > 40) | 2416 if (ncol_x > 40) |
| 2308 my_col_cex_scale <- 3.0 * 40 / ncol_x | 2417 my_col_cex_scale <- 3.0 * 40 / ncol_x |
| 2309 #ACE col cex shrink hack end | 2418 # col cex shrink hack end |
| 2310 | 2419 |
| 2311 my_asterisk_scale <- 0.4 * my_row_cex_scale | 2420 my_asterisk_scale <- 0.4 * my_row_cex_scale |
| 2312 my_row_warp <- 1 | 2421 my_row_warp <- 1 |
| 2313 my_note_warp <- 2 | 2422 my_note_warp <- 2 |
| 2314 my_row_warp <- 1 | 2423 my_row_warp <- 1 |
| 2336 ) | 2445 ) |
| 2337 my_margins <- c(1, 1) + | 2446 my_margins <- c(1, 1) + |
| 2338 c( | 2447 c( |
| 2339 margins[1] * 0.08 * max_nchar_colname * my_col_cex, | 2448 margins[1] * 0.08 * max_nchar_colname * my_col_cex, |
| 2340 margins[2] * 0.04 * max_nchar_rowname * my_row_cex | 2449 margins[2] * 0.04 * max_nchar_rowname * my_row_cex |
| 2341 ) | |
| 2342 | |
| 2343 my_notecex <- | |
| 2344 my_scale * | |
| 2345 min( | |
| 2346 1.1, | |
| 2347 my_row_cex_asterisk * my_note_warp, | |
| 2348 my_col_cex * my_note_warp | |
| 2349 ) | 2450 ) |
| 2350 | 2451 |
| 2351 if (print_trace_messages) { | 2452 if (print_trace_messages) { |
| 2352 cat_variable(my_heights, suffix = "; ") | 2453 cat_variable(my_heights, suffix = "; ") |
| 2353 cat_variable(my_margins, suffix = "\n\n") | 2454 cat_variable(my_margins, suffix = "\n\n") |
| 2401 return(TRUE) | 2502 return(TRUE) |
| 2402 } | 2503 } |
| 2403 | 2504 |
| 2404 # function drawing heatmap of contrast fold-change for each kinase, | 2505 # function drawing heatmap of contrast fold-change for each kinase, |
| 2405 # adapted from KSEAapp::KSEA.Heatmap | 2506 # adapted from KSEAapp::KSEA.Heatmap |
| 2507 # nolint start | |
| 2508 # squash un-actionable cyclomatic_complexity warning | |
| 2406 ksea_heatmap <- function( | 2509 ksea_heatmap <- function( |
| 2510 # nolint end | |
| 2407 # the data frame outputs from the KSEA.Scores() function, in list format | 2511 # the data frame outputs from the KSEA.Scores() function, in list format |
| 2408 score_list, | 2512 score_list, |
| 2409 # a character vector of all the sample names for heatmap annotation: | 2513 # a character vector of all the sample names for heatmap annotation: |
| 2410 # - the names must be in the same order as the data in score_list | 2514 # - the names must be in the same order as the data in score_list |
| 2411 # - please avoid long names, as they may get cropped in the final image | 2515 # - please avoid long names, as they may get cropped in the final image |
| 2427 # hierarchical clustering of the sample columns | 2531 # hierarchical clustering of the sample columns |
| 2428 sample_cluster, | 2532 sample_cluster, |
| 2429 # a binary input of TRUE or FALSE, indicating whether or not to export | 2533 # a binary input of TRUE or FALSE, indicating whether or not to export |
| 2430 # the heatmap as a .png image into the working directory | 2534 # the heatmap as a .png image into the working directory |
| 2431 export = FALSE, | 2535 export = FALSE, |
| 2432 # bottom and right margins; adjust as needehttps://tex.stackexchange.com/a/56795d if contrast names are too long | 2536 # bottom and right margins; adjust as needed if contrast names are too long |
| 2433 margins = c(6, 6), | 2537 margins = c(6, 6), |
| 2434 # print which kinases? | 2538 # print which kinases? |
| 2435 # - Mandatory argument, must be one of const_ksea_.*_kinases | 2539 # - Mandatory argument, must be one of const_ksea_.*_kinases |
| 2436 which_kinases, | 2540 which_kinases, |
| 2437 # additional arguments to gplots::heatmap.2, such as: | 2541 # additional arguments to gplots::heatmap.2, such as: |
| 2482 asterisk_rows <- rowSums(merged_asterisk == "*") > 0 | 2586 asterisk_rows <- rowSums(merged_asterisk == "*") > 0 |
| 2483 all_rows <- rownames(merged_stats) | 2587 all_rows <- rownames(merged_stats) |
| 2484 names(asterisk_rows) <- all_rows | 2588 names(asterisk_rows) <- all_rows |
| 2485 non_asterisk_rows <- names(asterisk_rows[asterisk_rows == FALSE]) | 2589 non_asterisk_rows <- names(asterisk_rows[asterisk_rows == FALSE]) |
| 2486 asterisk_rows <- names(asterisk_rows[asterisk_rows == TRUE]) | 2590 asterisk_rows <- names(asterisk_rows[asterisk_rows == TRUE]) |
| 2487 merged_scores_asterisk <- merged_scores[names(asterisk_rows), , drop = FALSE] | |
| 2488 merged_scores_non_asterisk <- merged_scores[names(non_asterisk_rows), , drop = FALSE] | |
| 2489 | 2591 |
| 2490 row_list <- list() | 2592 row_list <- list() |
| 2491 row_list[[const_ksea_astrsk_kinases]] <- asterisk_rows | 2593 row_list[[const_ksea_astrsk_kinases]] <- asterisk_rows |
| 2492 row_list[[const_ksea_all_kinases]] <- all_rows | 2594 row_list[[const_ksea_all_kinases]] <- all_rows |
| 2493 row_list[[const_ksea_nonastrsk_kinases]] <- non_asterisk_rows | 2595 row_list[[const_ksea_nonastrsk_kinases]] <- non_asterisk_rows |
| 2611 } | 2713 } |
| 2612 | 2714 |
| 2613 keep_var_gtr_0 <- keep_var_gtr_min(0) | 2715 keep_var_gtr_0 <- keep_var_gtr_min(0) |
| 2614 | 2716 |
| 2615 # function drawing heatmap of phosphopeptide intensities | 2717 # function drawing heatmap of phosphopeptide intensities |
| 2718 # nolint start | |
| 2719 # squash un-actionable cyclomatic_complexity warning | |
| 2616 ppep_heatmap <- | 2720 ppep_heatmap <- |
| 2721 # nolint end | |
| 2617 function( | 2722 function( |
| 2618 m, # matrix with rownames already formatted | 2723 m, # matrix with rownames already formatted |
| 2619 cutoff, # cutoff used by hm_heading_function | 2724 cutoff, # cutoff used by hm_heading_function |
| 2620 hm_heading_function, # construct $ cat heading from m and cutoff | 2725 hm_heading_function, # construct $ cat heading from m and cutoff |
| 2621 hm_main_title, # main title for plot (drawn below heading) | 2726 hm_main_title, # main title for plot (drawn below heading) |
| 2628 adj = 0.5, # adjust text: 0 left, 0.5 middle, 1 right | 2733 adj = 0.5, # adjust text: 0 left, 0.5 middle, 1 right |
| 2629 row_scaling = # should row-scaling be applied if possible? | 2734 row_scaling = # should row-scaling be applied if possible? |
| 2630 g_default_heatmap_row_scaling, | 2735 g_default_heatmap_row_scaling, |
| 2631 ... # passthru to hm2plus or heatmap.2 | 2736 ... # passthru to hm2plus or heatmap.2 |
| 2632 ) { | 2737 ) { |
| 2633 use_heatmap_1 <- FALSE | |
| 2634 peptide_count <- 0 | 2738 peptide_count <- 0 |
| 2635 # emit the heading for the heatmap | 2739 # emit the heading for the heatmap |
| 2636 if (hm_heading_function(m, cutoff)) { | 2740 if (hm_heading_function(m, cutoff)) { |
| 2637 nrow_m <- nrow(m) | 2741 nrow_m <- nrow(m) |
| 2638 peptide_count <- min(max_peptide_count, nrow_m) | 2742 peptide_count <- min(max_peptide_count, nrow_m) |
| 2641 # Margin was heuristically derived to accommodate the widest label | 2745 # Margin was heuristically derived to accommodate the widest label |
| 2642 row_mchar_max <- max(nchar(rownames(m_margin))) | 2746 row_mchar_max <- max(nchar(rownames(m_margin))) |
| 2643 col_mchar_max <- max(nchar(colnames(m_margin))) | 2747 col_mchar_max <- max(nchar(colnames(m_margin))) |
| 2644 row_margin <- master_cex * row_mchar_max * 2.6 | 2748 row_margin <- master_cex * row_mchar_max * 2.6 |
| 2645 col_margin <- master_cex * col_mchar_max * 2.6 | 2749 col_margin <- master_cex * col_mchar_max * 2.6 |
| 2646 if (print_trace_messages) cat(sprintf("row_margin = %0.3f; ", row_margin)) | 2750 if (print_trace_messages) cat( |
| 2647 if (print_trace_messages) cat(sprintf("col_margin = %0.3f; ", col_margin)) | 2751 sprintf("row_margin = %0.3f; ", row_margin)) |
| 2752 if (print_trace_messages) cat( | |
| 2753 sprintf("col_margin = %0.3f; ", col_margin)) | |
| 2648 hm_call <- NULL | 2754 hm_call <- NULL |
| 2649 tryCatch( | 2755 tryCatch( |
| 2650 { | 2756 { |
| 2651 # set non-argument parameters for hm_call inner function | 2757 # set non-argument parameters for hm_call inner function |
| 2652 my_row_cex <- | 2758 my_row_cex <- |
| 2654 (max(nchar(rownames(m_margin)))^2) * g_intensity_hm_rows | 2760 (max(nchar(rownames(m_margin)))^2) * g_intensity_hm_rows |
| 2655 ) | 2761 ) |
| 2656 m_hm <- m[peptide_count:1, , drop = FALSE] | 2762 m_hm <- m[peptide_count:1, , drop = FALSE] |
| 2657 if (is.null(cellnote)) { | 2763 if (is.null(cellnote)) { |
| 2658 cellnote <- matrix("", nrow = nrow(m_hm), ncol = ncol(m_hm)) | 2764 cellnote <- matrix("", nrow = nrow(m_hm), ncol = ncol(m_hm)) |
| 2659 cellnote[is.na(m_hm)] <- "NA" | 2765 # nolint start |
| 2766 # squash un-actionable "no visible global for ..." | |
| 2767 cellnote[is.na(m_hm)] <- params$heatMapNAcellNote | |
| 2768 # nolint end | |
| 2660 } else { | 2769 } else { |
| 2661 cellnote <- cellnote[peptide_count:1, , drop = FALSE] | 2770 cellnote <- cellnote[peptide_count:1, , drop = FALSE] |
| 2662 } | 2771 } |
| 2663 m_hm[is.na(m_hm)] <- 0 | 2772 # nolint start |
| 2773 # squash un-actionable "no visible global for ..." | |
| 2774 if (params$heatMapNAsubstitute) m_hm[is.na(m_hm)] <- 0 | |
| 2775 # nolint end | |
| 2664 nrow_m_hm <- nrow(m_hm) | 2776 nrow_m_hm <- nrow(m_hm) |
| 2665 ncol_m_hm <- ncol(m_hm) | 2777 ncol_m_hm <- ncol(m_hm) |
| 2666 if (nrow_m_hm < 1 || ncol_m_hm < 1) | 2778 if (nrow_m_hm < 1 || ncol_m_hm < 1) |
| 2667 return(peptide_count) # return zero as initialized above | 2779 return(peptide_count) # return zero as initialized above |
| 2668 my_limit <- g_intensity_hm_rows | |
| 2669 | 2780 |
| 2670 | 2781 |
| 2671 my_row_cex <- master_cex * (100 / (2 + row_mchar_max)) | 2782 my_row_cex <- master_cex * (100 / (2 + row_mchar_max)) |
| 2672 my_col_cex <- master_cex * 6 * row_margin / col_margin | 2783 my_col_cex <- master_cex * 6 * row_margin / col_margin |
| 2673 my_col_adj <- min(my_col_cex, my_row_cex) / my_col_cex | 2784 my_col_adj <- min(my_col_cex, my_row_cex) / my_col_cex |
| 2674 my_col_cex <- min(my_col_cex, my_row_cex) | 2785 my_col_cex <- min(my_col_cex, my_row_cex) |
| 2675 col_margin <- sqrt(my_col_adj) * col_margin | 2786 col_margin <- sqrt(my_col_adj) * col_margin |
| 2676 if (print_trace_messages) cat(sprintf("my_row_cex = %0.3f; ", my_row_cex)) | 2787 if (print_trace_messages) cat( |
| 2677 if (print_trace_messages) cat(sprintf("my_col_cex = %0.3f; ", my_col_cex)) | 2788 sprintf("my_row_cex = %0.3f; ", my_row_cex)) |
| 2789 if (print_trace_messages) cat( | |
| 2790 sprintf("my_col_cex = %0.3f; ", my_col_cex)) | |
| 2678 if (is.null(margins)) my_margins <- | 2791 if (is.null(margins)) my_margins <- |
| 2679 c( | 2792 c( |
| 2680 (my_col_cex + col_margin), # col | 2793 (my_col_cex + col_margin), # col |
| 2681 (my_row_cex + row_margin) / my_row_cex # row | 2794 (my_row_cex + row_margin) / my_row_cex # row |
| 2682 ) | 2795 ) |
| 2688 "my_margins = c(%s)\n\n", | 2801 "my_margins = c(%s)\n\n", |
| 2689 paste(my_margins, collapse = ", ") | 2802 paste(my_margins, collapse = ", ") |
| 2690 ) | 2803 ) |
| 2691 ) | 2804 ) |
| 2692 my_hm2_cex <- 2 * master_cex | 2805 my_hm2_cex <- 2 * master_cex |
| 2693 my_key_cex <- 0.9 - 0.1 * (g_intensity_hm_rows + nrow_m_hm) / g_intensity_hm_rows | 2806 my_key_cex <- |
| 2807 0.9 - | |
| 2808 0.1 * (g_intensity_hm_rows + nrow_m_hm) / g_intensity_hm_rows | |
| 2694 my_key_warp <- 1.5 * 22.75 / row_margin | 2809 my_key_warp <- 1.5 * 22.75 / row_margin |
| 2695 my_key_cex <- min(1.10, my_key_warp * my_key_cex) | 2810 my_key_cex <- min(1.10, my_key_warp * my_key_cex) |
| 2696 my_hgt_scale <- 3.70 - 0.4 * (max(1, 0.9 * my_row_cex) - 1) | 2811 my_hgt_scale <- 3.70 - 0.4 * (max(1, 0.9 * my_row_cex) - 1) |
| 2697 my_hgt_scale <- 3.75 # 3.615 | 2812 my_hgt_scale <- 3.75 # 3.615 |
| 2698 my_hgt_scale <- 3.60 # 3.615 | 2813 my_hgt_scale <- 3.60 # 3.615 |
| 2703 cat_variable(my_warp, "\n\n", 3) | 2818 cat_variable(my_warp, "\n\n", 3) |
| 2704 # added 0.9 heuristically... | 2819 # added 0.9 heuristically... |
| 2705 my_plot_height <- | 2820 my_plot_height <- |
| 2706 (0.566 + 0.354 * (nrow_m / g_intensity_hm_rows)) * | 2821 (0.566 + 0.354 * (nrow_m / g_intensity_hm_rows)) * |
| 2707 min(my_hgt_scale, my_hgt_scale * my_warp) | 2822 min(my_hgt_scale, my_hgt_scale * my_warp) |
| 2708 my_plot_height <- min(3.65, my_plot_height * g_intensity_hm_rows / 50) | 2823 my_plot_height <- |
| 2824 min(3.65, my_plot_height * g_intensity_hm_rows / 50) | |
| 2709 my_heights <- c( | 2825 my_heights <- c( |
| 2710 0.3, # title and top dendrogram | 2826 0.3, # title and top dendrogram |
| 2711 my_plot_height, # plot and bottom margin | 2827 my_plot_height, # plot and bottom margin |
| 2712 4.15 - my_hgt_scale, # legend | 2828 4.15 - my_hgt_scale, # legend |
| 2713 0.05 + my_hgt_scale - my_plot_height # whitespace below legend | 2829 0.05 + my_hgt_scale - my_plot_height # whitespace below legend |
| 2768 scale = scaling, | 2884 scale = scaling, |
| 2769 srtcol = 90, | 2885 srtcol = 90, |
| 2770 srtrow = 0, | 2886 srtrow = 0, |
| 2771 xlab = "", | 2887 xlab = "", |
| 2772 cellnote = cellnote, | 2888 cellnote = cellnote, |
| 2889 # ref for na.color: | |
| 2890 # https://cran.r-project.org/web/packages/gplots/gplots.pdf | |
| 2891 # nolint start | |
| 2892 # squash un-actionable "no visible global for ..." | |
| 2893 na.color = params$heatMapNAcellColor, | |
| 2894 # nolint end | |
| 2773 notecex = my_note_cex, | 2895 notecex = my_note_cex, |
| 2774 ... | 2896 ... |
| 2775 ) | 2897 ) |
| 2776 ) | 2898 ) |
| 2777 ) { | 2899 ) { |
| 2831 }, | 2953 }, |
| 2832 error = function(r) { | 2954 error = function(r) { |
| 2833 cat( | 2955 cat( |
| 2834 sprintf( | 2956 sprintf( |
| 2835 "\n%s %s Internal message: %s\n\\newline\n\n", | 2957 "\n%s %s Internal message: %s\n\\newline\n\n", |
| 2836 "Failure drawing heatmap,", | 2958 "Failure drawing heatmap, possibly because of ", |
| 2837 "possibly because of too many missing values.\n\\newline\n\n", | 2959 "too many missing values.\n\\newline\n\n", |
| 2838 r$message | 2960 r$message |
| 2839 ) | 2961 ) |
| 2840 ) | 2962 ) |
| 2841 cat_margins() | 2963 cat_margins() |
| 2842 } | 2964 } |
| 2843 ) | 2965 ) |
| 2844 } else { | 2966 } else { |
| 2845 cat( | 2967 cat( |
| 2846 "\nFailure drawing heatmap, possibly because of too many missing values.\n" | 2968 "\nFailure drawing heatmap, ", |
| 2969 "possibly because of too many missing values.\n" | |
| 2847 ) | 2970 ) |
| 2848 } | 2971 } |
| 2849 } | 2972 } |
| 2850 ) | 2973 ) |
| 2851 } | 2974 } |
| 2853 # return value: | 2976 # return value: |
| 2854 peptide_count | 2977 peptide_count |
| 2855 } | 2978 } |
| 2856 | 2979 |
| 2857 # function drawing heatmap of correlations if they exist, else covariances | 2980 # function drawing heatmap of correlations if they exist, else covariances |
| 2981 # nolint start | |
| 2982 # squash un-actionable cyclomatic_complexity warning | |
| 2983 # squash un-actionable "no visible global for ..." | |
| 2858 cov_heatmap <- | 2984 cov_heatmap <- |
| 2859 function( | 2985 function( |
| 2986 # nolint end | |
| 2860 m, # matrix with rownames already formatted | 2987 m, # matrix with rownames already formatted |
| 2988 kinase_name, | |
| 2861 top_substrates = FALSE, | 2989 top_substrates = FALSE, |
| 2862 ... # passthru to hm2plus or heatmap.2 | 2990 ... # passthru to hm2plus or heatmap.2 |
| 2863 ) { | 2991 ) { |
| 2864 if (print_nb_messages) nbe(see_variable(m), " [", nrow(m), "x", ncol(m), "\n") | 2992 if (print_nb_messages) nbe( |
| 2865 #ACE nb(rowSums(m, na.rm = TRUE)) | 2993 see_variable(m), " [", nrow(m), "x", ncol(m), "\n") |
| 2866 #ACE bad_rows <- (rowSums(m, na.rm = TRUE) == 0) | |
| 2867 #ACE nb(see_variable(bad_rows)) | |
| 2868 #ACE m <- m[-bad_rows, , drop = FALSE] | |
| 2869 colnames_m <- colnames(m) | 2994 colnames_m <- colnames(m) |
| 2870 is_na_m <- is.na(m) | 2995 is_na_m <- is.na(m) |
| 2871 tmp <- m | 2996 tmp <- m |
| 2872 tmp[is_na_m] <- 0 | 2997 tmp[is_na_m] <- 0 |
| 2873 | 2998 |
| 2901 } | 3026 } |
| 2902 | 3027 |
| 2903 | 3028 |
| 2904 t_m <- t(tmp) | 3029 t_m <- t(tmp) |
| 2905 t_m[is.na(t_m)] <- 0 | 3030 t_m[is.na(t_m)] <- 0 |
| 2906 prefiltered_nrow <- ncol(t_m) | |
| 2907 | 3031 |
| 2908 my_corcov <- cov(t_m) | 3032 my_corcov <- cov(t_m) |
| 2909 did_filter_rows <- did_filter_cols <- FALSE | 3033 did_filter_rows <- did_filter_cols <- FALSE |
| 2910 if (g_correlate_substrates && !is_positive_definite(my_corcov)) { | 3034 if (g_correlate_substrates && !is_positive_definite(my_corcov)) { |
| 2911 my_correlate_substrates <- FALSE | 3035 my_correlate_substrates <- FALSE |
| 2942 | 3066 |
| 2943 f_omissions <- | 3067 f_omissions <- |
| 2944 function(is_corr) { | 3068 function(is_corr) { |
| 2945 cat( | 3069 cat( |
| 2946 sprintf( | 3070 sprintf( |
| 2947 "Below is the %s plot for %s substrates", | 3071 "Below is the %s plot for %s sites", |
| 2948 if (is_corr) "correlation" else "covariance", | 3072 if (is_corr) "correlation" else "covariance", |
| 2949 sprintf( | 3073 sprintf( |
| 2950 if (top_substrates) | 3074 if (top_substrates) |
| 2951 "%0.0f \"highest-quality\"" | 3075 "%0.0f \"highest-quality\"" |
| 2952 else | 3076 else |
| 3004 | 3128 |
| 3005 parjust <- par(adj = 0.5) | 3129 parjust <- par(adj = 0.5) |
| 3006 on.exit(par(parjust)) | 3130 on.exit(par(parjust)) |
| 3007 my_corcov <- my_corcov[order(rownames(my_corcov)), ] | 3131 my_corcov <- my_corcov[order(rownames(my_corcov)), ] |
| 3008 my_main <- | 3132 my_main <- |
| 3009 sprintf("%s among %s substrates %s", | 3133 sprintf("%s among %s sites %s", |
| 3010 if (my_correlate_substrates) "Correlation" | 3134 if (my_correlate_substrates) "Correlation" |
| 3011 else "Covariance", | 3135 else "Covariance", |
| 3012 kinase_name, | 3136 kinase_name, |
| 3013 if (!my_correlate_substrates && | 3137 if (!my_correlate_substrates && |
| 3014 g_filter_cov_var_gt_1 && | 3138 g_filter_cov_var_gt_1 && |
| 3023 my_plot_height, | 3147 my_plot_height, |
| 3024 my_legend_height # was 4.0 - my_hgt_scale # was 4.19 | 3148 my_legend_height # was 4.0 - my_hgt_scale # was 4.19 |
| 3025 ) | 3149 ) |
| 3026 if (print_trace_messages) cat(sprintf("max_nchar = %0.3f; ", max_nchar)) | 3150 if (print_trace_messages) cat(sprintf("max_nchar = %0.3f; ", max_nchar)) |
| 3027 if (print_trace_messages) cat(sprintf("my_margin = %0.3f; ", my_margin)) | 3151 if (print_trace_messages) cat(sprintf("my_margin = %0.3f; ", my_margin)) |
| 3028 if (print_trace_messages) cat(sprintf("my_plot_height = %0.3f\n\n", my_plot_height)) | 3152 if (print_trace_messages) cat( |
| 3153 sprintf("my_plot_height = %0.3f\n\n", my_plot_height)) | |
| 3029 if (print_trace_messages) cat(sprintf("master_cex = %0.3f; ", master_cex)) | 3154 if (print_trace_messages) cat(sprintf("master_cex = %0.3f; ", master_cex)) |
| 3030 if (print_trace_messages) cat(sprintf("my_row_cex = %0.3f; ", my_row_cex)) | 3155 if (print_trace_messages) cat(sprintf("my_row_cex = %0.3f; ", my_row_cex)) |
| 3031 if (print_trace_messages) cat(sprintf("my_col_cex = %0.3f; ", my_col_cex)) | 3156 if (print_trace_messages) cat(sprintf("my_col_cex = %0.3f; ", my_col_cex)) |
| 3032 if (print_trace_messages) cat(sprintf("my_key_cex = %0.3f\n\n", my_key_cex)) | 3157 if (print_trace_messages) cat( |
| 3033 if (print_trace_messages) cat(sprintf("my_hgt_scale = %0.3f\n\n", my_hgt_scale)) | 3158 sprintf("my_key_cex = %0.3f\n\n", my_key_cex)) |
| 3034 if (print_trace_messages) cat(sprintf("legend height = %0.3f\n\n", my_legend_height)) | 3159 if (print_trace_messages) cat( |
| 3160 sprintf("my_hgt_scale = %0.3f\n\n", my_hgt_scale)) | |
| 3161 if (print_trace_messages) cat( | |
| 3162 sprintf("legend height = %0.3f\n\n", my_legend_height)) | |
| 3035 if (print_trace_messages) cat( | 3163 if (print_trace_messages) cat( |
| 3036 sprintf( | 3164 sprintf( |
| 3037 "my_heights = c(%s); sum = %0.3f\n\n", | 3165 "my_heights = c(%s); sum = %0.3f\n\n", |
| 3038 paste( | 3166 paste( |
| 3039 sprintf("%0.3f", my_heights), | 3167 sprintf("%0.3f", my_heights), |
| 3671 "median peptide-intensity across all sample classes.\n" | 3799 "median peptide-intensity across all sample classes.\n" |
| 3672 ) | 3800 ) |
| 3673 # Take the accurate ln(x+1) because the data are log-normally distributed | 3801 # Take the accurate ln(x+1) because the data are log-normally distributed |
| 3674 # and because median can involve an average of two measurements. | 3802 # and because median can involve an average of two measurements. |
| 3675 quant_data_imp <- log1p(quant_data_imp) | 3803 quant_data_imp <- log1p(quant_data_imp) |
| 3676 quant_data_imp[ind] <- row_apply(quant_data_imp, median, na.rm = TRUE)[ind[, 1]] | 3804 quant_data_imp[ind] <- |
| 3805 row_apply(quant_data_imp, median, na.rm = TRUE)[ind[, 1]] | |
| 3677 # Take the accurate e^x - 1 to match scaling of original input. | 3806 # Take the accurate e^x - 1 to match scaling of original input. |
| 3678 quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp)) | 3807 quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp)) |
| 3679 good_rows <- !is.nan(rowMeans(quant_data_imp)) | 3808 good_rows <- !is.nan(rowMeans(quant_data_imp)) |
| 3680 } | 3809 } |
| 3681 , "mean" = { | 3810 , "mean" = { |
| 3687 # Take the accurate ln(x+1) because the data are log-normally distributed, | 3816 # Take the accurate ln(x+1) because the data are log-normally distributed, |
| 3688 # so arguments to mean should be previously transformed. | 3817 # so arguments to mean should be previously transformed. |
| 3689 # this will have to be | 3818 # this will have to be |
| 3690 quant_data_imp <- log1p(quant_data_imp) | 3819 quant_data_imp <- log1p(quant_data_imp) |
| 3691 # Assign to NA cells the mean for the row | 3820 # Assign to NA cells the mean for the row |
| 3692 quant_data_imp[ind] <- row_apply(quant_data_imp, mean, na.rm = TRUE)[ind[, 1]] | 3821 quant_data_imp[ind] <- |
| 3822 row_apply(quant_data_imp, mean, na.rm = TRUE)[ind[, 1]] | |
| 3693 # Take the accurate e^x - 1 to match scaling of original input. | 3823 # Take the accurate e^x - 1 to match scaling of original input. |
| 3694 quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp)) | 3824 quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp)) |
| 3695 good_rows <- !is.nan(rowMeans(quant_data_imp)) | 3825 good_rows <- !is.nan(rowMeans(quant_data_imp)) |
| 3696 } | 3826 } |
| 3697 , "random" = { | 3827 , "random" = { |
| 3698 quant_data_imp <- quant_data | 3828 quant_data_imp <- quant_data |
| 3699 m1 <- median(sds, na.rm = TRUE) * sd_percentile #sd to be used is the median sd | 3829 # sd to be used is the median sd |
| 3830 m1 <- median(sds, na.rm = TRUE) * sd_percentile | |
| 3700 # If you want results to be reproducible, you will want to call | 3831 # If you want results to be reproducible, you will want to call |
| 3701 # base::set.seed before calling stats::rnorm | 3832 # base::set.seed before calling stats::rnorm |
| 3702 imputation_method_description <- | 3833 imputation_method_description <- |
| 3703 paste("Substitute each missing value with random intensity", | 3834 paste("Substitute each missing value with random intensity", |
| 3704 sprintf( | 3835 sprintf( |
| 3733 imp_smry_pot_peptides_after <- sum(good_rows) | 3864 imp_smry_pot_peptides_after <- sum(good_rows) |
| 3734 imp_smry_rejected_after <- sum(!good_rows) | 3865 imp_smry_rejected_after <- sum(!good_rows) |
| 3735 imp_smry_missing_values_after <- sum(is.na(quant_data_imp[good_rows, ])) | 3866 imp_smry_missing_values_after <- sum(is.na(quant_data_imp[good_rows, ])) |
| 3736 | 3867 |
| 3737 # From ?`%in%`: | 3868 # From ?`%in%`: |
| 3738 # %in% is currently defined as function(x, table) match(x, table, nomatch = 0) > 0 | 3869 # %in% is currently defined |
| 3870 # as function(x, table) match(x, table, nomatch = 0) > 0 | |
| 3739 | 3871 |
| 3740 stock_in <- | 3872 stock_in <- |
| 3741 names(good_rows) %in% | 3873 names(good_rows) %in% |
| 3742 names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count]) | 3874 names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count]) |
| 3743 if (print_nb_messages) nbe(see_variable(stock_in), "\n") | 3875 if (print_nb_messages) nbe(see_variable(stock_in), "\n") |
| 3796 cat(tabular_lines) | 3928 cat(tabular_lines) |
| 3797 ``` | 3929 ``` |
| 3798 | 3930 |
| 3799 ```{r filter_good_rows, echo = FALSE} | 3931 ```{r filter_good_rows, echo = FALSE} |
| 3800 | 3932 |
| 3801 if (print_nb_messages) nbe("before name extraction, ", see_variable(length(good_rows)), " ", see_variable(good_rows), "\n") | 3933 if (print_nb_messages) nbe( |
| 3934 "before name extraction, ", see_variable(length(good_rows)), " ", | |
| 3935 see_variable(good_rows), "\n") | |
| 3802 good_rows <- names(good_rows[names(great_enough_row_names)]) | 3936 good_rows <- names(good_rows[names(great_enough_row_names)]) |
| 3803 if (print_nb_messages) nbe("after name extraction, ", see_variable(length(good_rows)), see_variable(good_rows), "\n") | 3937 if (print_nb_messages) nbe( |
| 3804 | 3938 "after name extraction, ", see_variable(length(good_rows)), |
| 3805 #ACE min_group_obs_count <- min_group_obs_count[names(great_enough_row_names)] | 3939 see_variable(good_rows), "\n") |
| 3806 #ACE nbe("good_rows") | 3940 |
| 3807 #ACE nbe(see_variable(good_rows)) | |
| 3808 #ACE nbe("names(min_group_obs_count) before filter for good rows") | |
| 3809 #ACE nbe(see_variable(names(min_group_obs_count))) | |
| 3810 min_group_obs_count <- min_group_obs_count[good_rows] | 3941 min_group_obs_count <- min_group_obs_count[good_rows] |
| 3811 #ACE nbe("min_group_obs_count after filter for good rows") | |
| 3812 #ACE nbe(see_variable(names(min_group_obs_count))) | |
| 3813 | 3942 |
| 3814 # Zap rows where imputation was insufficiently effective | 3943 # Zap rows where imputation was insufficiently effective |
| 3815 full_data <- full_data [good_rows, ] | 3944 full_data <- full_data [good_rows, ] |
| 3816 quant_data <- quant_data [good_rows, ] | 3945 quant_data <- quant_data [good_rows, ] |
| 3817 quant_data_log <- quant_data_log [good_rows, ] | 3946 quant_data_log <- quant_data_log [good_rows, ] |
| 3818 | 3947 |
| 3819 if (print_nb_messages) nbe("before row filter, ", see_variable(nrow(quant_data_imp)), "\n") | 3948 if (print_nb_messages) nbe( |
| 3949 "before row filter, ", see_variable(nrow(quant_data_imp)), "\n") | |
| 3820 quant_data_imp <- quant_data_imp[good_rows, ] | 3950 quant_data_imp <- quant_data_imp[good_rows, ] |
| 3821 if (print_nb_messages) nbe("after row filter, ", see_variable(nrow(quant_data_imp)), "\n") | 3951 if (print_nb_messages) nbe( |
| 3952 "after row filter, ", see_variable(nrow(quant_data_imp)), "\n") | |
| 3822 write_debug_file(quant_data_imp) | 3953 write_debug_file(quant_data_imp) |
| 3823 quant_data_imp_good_rows <- quant_data_imp | 3954 quant_data_imp_good_rows <- quant_data_imp |
| 3824 | 3955 |
| 3825 write_debug_file(quant_data_imp_good_rows) | 3956 write_debug_file(quant_data_imp_good_rows) |
| 3826 ``` | 3957 ``` |
| 3945 my_ppep_distrib_bxp( | 4076 my_ppep_distrib_bxp( |
| 3946 x = blue_dots | 4077 x = blue_dots |
| 3947 , sample_name_grow = sample_name_grow | 4078 , sample_name_grow = sample_name_grow |
| 3948 , main = "Peptide intensities after eliminating unusable peptides" | 4079 , main = "Peptide intensities after eliminating unusable peptides" |
| 3949 , varwidth = boxplot_varwidth | 4080 , varwidth = boxplot_varwidth |
| 3950 , sub = paste(boxplot_sub, "Box widths reflect number of peptides for sample") | 4081 , sub = paste(boxplot_sub, |
| 4082 "Box widths reflect number of peptides for sample") | |
| 3951 , xlab = "Sample" | 4083 , xlab = "Sample" |
| 3952 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") | 4084 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") |
| 3953 , col = const_boxplot_fill | 4085 , col = const_boxplot_fill |
| 3954 , notch = FALSE | 4086 , notch = FALSE |
| 3955 , ylim = ylim | 4087 , ylim = ylim |
| 4281 | 4413 |
| 4282 ## Assignment of $p$-value and quality score | 4414 ## Assignment of $p$-value and quality score |
| 4283 | 4415 |
| 4284 For each phosphopeptide, ANOVA analysis was performed to compute a $p$-value representing the evidence against the "null hypothesis" ($H_0$) that the intensity does not vary significantly among sample groups. | 4416 For each phosphopeptide, ANOVA analysis was performed to compute a $p$-value representing the evidence against the "null hypothesis" ($H_0$) that the intensity does not vary significantly among sample groups. |
| 4285 | 4417 |
| 4286 However, because as more and more phosphopeptides are tested, there is increasd probability that, by random chance, a given peptide will have a $p$-value that appears to indicate significance. For this reason, the $p$-values were adjusted by applying the False Discovery Rate (FDR) correction from Benjamini and Hochberg (1995) [doi:10.1111/j.2517-6161.1995.tb02031.x](https:/doi.org/10.1111/j.2517-6161.1995.tb02031.x). | 4418 However, because as more and more phosphopeptides are tested, there is increasd probability that, by random chance, a given peptide will have a $p$-value that appears to indicate significance. For this reason, the $p$-values were adjusted by applying the False Discovery Rate (FDR) correction from Benjamini and Hochberg (1995) [doi:10.1111/j.2517-6161.1995.tb02031.x](https://doi.org/10.1111/j.2517-6161.1995.tb02031.x). |
| 4287 | 4419 |
| 4288 Furthermore, to give more weight to phosphopeptides having fewer missing values, an (arbitrarily defined) quality score was assigned to each, defined as: | 4420 Furthermore, to give more weight to phosphopeptides having fewer missing values, an (arbitrarily defined) quality score was assigned to each, defined as: |
| 4289 | 4421 |
| 4290 $$ | 4422 $$ |
| 4291 \textit{quality}_j | 4423 \textit{quality}_j |
| 4311 ``` | 4443 ``` |
| 4312 | 4444 |
| 4313 ```{r anova, echo = FALSE, fig.dim = c(10, 12), results = 'asis'} | 4445 ```{r anova, echo = FALSE, fig.dim = c(10, 12), results = 'asis'} |
| 4314 g_can_run_ksea <- FALSE | 4446 g_can_run_ksea <- FALSE |
| 4315 old_oma <- par("oma") | 4447 old_oma <- par("oma") |
| 4448 | |
| 4449 # nolint start | |
| 4450 # squash un-actionable cyclomatic_complexity warning | |
| 4316 if (count_of_treatment_levels < 2) { | 4451 if (count_of_treatment_levels < 2) { |
| 4452 # nolint end | |
| 4317 cat( | 4453 cat( |
| 4318 "ERROR!!!! Cannot perform ANOVA analysis", | 4454 "ERROR!!!! Cannot perform ANOVA analysis", |
| 4319 "(see next page)\\newpage\n" | 4455 "(see next page)\\newpage\n" |
| 4320 ) | 4456 ) |
| 4321 cat( | 4457 cat( |
| 4365 } else { | 4501 } else { |
| 4366 | 4502 |
| 4367 if (print_nb_messages) nbe("computing p_value_data_anova_ps\n") | 4503 if (print_nb_messages) nbe("computing p_value_data_anova_ps\n") |
| 4368 if (print_nb_messages) nbe(see_variable(nrow(quant_data_imp_qn_log)), "\n") | 4504 if (print_nb_messages) nbe(see_variable(nrow(quant_data_imp_qn_log)), "\n") |
| 4369 if (print_nb_messages) nbe(see_variable(ncol(quant_data_imp_qn_log)), "\n") | 4505 if (print_nb_messages) nbe(see_variable(ncol(quant_data_imp_qn_log)), "\n") |
| 4370 if (print_nb_messages) nbe(see_variable(quant_data_imp_qn_log[, ".NE.7C"]), "\n") | |
| 4371 if (print_nb_messages) nbe(see_variable(quant_data_imp_qn_log), "\n") | 4506 if (print_nb_messages) nbe(see_variable(quant_data_imp_qn_log), "\n") |
| 4372 if (print_nb_messages) nbe(see_variable(anova_func), "\n") | 4507 if (print_nb_messages) nbe(see_variable(anova_func), "\n") |
| 4373 if (print_nb_messages) nbe(see_variable(smpl_trt), "\n") | 4508 if (print_nb_messages) nbe(see_variable(smpl_trt), "\n") |
| 4374 if (print_nb_messages) nbe(see_variable(one_way_all_categories), "\n") | 4509 if (print_nb_messages) nbe(see_variable(one_way_all_categories), "\n") |
| 4375 tryCatch( | 4510 tryCatch( |
| 4475 if (number_of_peptides_found > g_intensity_hm_rows - 1) { | 4610 if (number_of_peptides_found > g_intensity_hm_rows - 1) { |
| 4476 cat("\\leavevmode\n\n\n") | 4611 cat("\\leavevmode\n\n\n") |
| 4477 break | 4612 break |
| 4478 } | 4613 } |
| 4479 | 4614 |
| 4615 if (print_trace_messages) { | |
| 4616 cat_variable(cutoff) | |
| 4617 cat("\n\n") | |
| 4618 } | |
| 4619 | |
| 4480 bool_1 <- (p_value_data$fdr_adjusted_anova_p < cutoff) | 4620 bool_1 <- (p_value_data$fdr_adjusted_anova_p < cutoff) |
| 4481 bool_2 <- (p_value_data$min_group_obs_count >= g_intensity_min_per_class) | 4621 bool_2 <- (p_value_data$min_group_obs_count >= g_intensity_min_per_class) |
| 4482 g_can_run_ksea <- g_can_run_ksea || (sum(bool_2) > 0) | 4622 g_can_run_ksea <- g_can_run_ksea || (sum(bool_2) > 0) |
| 4483 bool_4 <- (p_value_data$quality >= params$minQuality) | 4623 bool_4 <- (p_value_data$quality >= params$minQuality) |
| 4484 bool_3 <- as.logical( | 4624 bool_3 <- as.logical( |
| 4507 | 4647 |
| 4508 if (print_trace_messages) | 4648 if (print_trace_messages) |
| 4509 cat_variable(filtered_p, force_str = TRUE) | 4649 cat_variable(filtered_p, force_str = TRUE) |
| 4510 | 4650 |
| 4511 have_remaining_peptides <- sum(bool_3, na.rm = TRUE) > 0 | 4651 have_remaining_peptides <- sum(bool_3, na.rm = TRUE) > 0 |
| 4652 | |
| 4512 filter_result_string <- | 4653 filter_result_string <- |
| 4513 sprintf( | 4654 sprintf( |
| 4514 "%s, %s of %0.0f peptides remained having both %s and %s.\n\n", | 4655 "%s, %s of %0.0f peptides remained having both %s and %s.\n\n", |
| 4515 "After filtering for ANOVA results", | 4656 "After filtering for ANOVA results", |
| 4516 if (have_remaining_peptides) | 4657 if (have_remaining_peptides) |
| 4517 as.character(sum(bool_3, na.rm = TRUE)) | 4658 as.character(sum(bool_3, na.rm = TRUE)) |
| 4518 else | 4659 else |
| 4519 "none", | 4660 "none", |
| 4520 length(bool_3), | 4661 length(bool_3), |
| 4521 sprintf("adjusted p-value < %s", as.character(signif(cutoff, 2))), | 4662 adj_p_string <- |
| 4663 sprintf("adjusted p-value < %s", as.character(signif(cutoff, 2))), | |
| 4522 sprintf( | 4664 sprintf( |
| 4523 "more than %0.0f observations in some groups", | 4665 "more than %0.0f observations in some groups", |
| 4524 max(0, g_intensity_min_per_class - 1) | 4666 max(0, g_intensity_min_per_class - 1) |
| 4525 ) | 4667 ) |
| 4526 ) | 4668 ) |
| 4535 filtered_data_filtered[ | 4677 filtered_data_filtered[ |
| 4536 order(filtered_p$fdr_adjusted_anova_p), | 4678 order(filtered_p$fdr_adjusted_anova_p), |
| 4537 , drop = FALSE | 4679 , drop = FALSE |
| 4538 ] | 4680 ] |
| 4539 | 4681 |
| 4540 if (have_remaining_peptides && nrow(filtered_p) > 0 && nrow(filtered_data_filtered) > 0) { | 4682 if (have_remaining_peptides |
| 4683 && nrow(filtered_p) > 0 | |
| 4684 && nrow(filtered_data_filtered) > 0 | |
| 4685 ) { | |
| 4541 if (first_page_suppress == 1) { | 4686 if (first_page_suppress == 1) { |
| 4542 first_page_suppress <- 0 | 4687 first_page_suppress <- 0 |
| 4543 } else { | 4688 } else { |
| 4544 cat("\\newpage\n") | 4689 cat("\\newpage\n") |
| 4545 } | 4690 } |
| 4546 latex_samepage({ | 4691 latex_samepage({ |
| 4547 cat(filter_result_string) | 4692 cat(filter_result_string) |
| 4548 if (nrow(filtered_data_filtered) > 1) { | 4693 if (nrow(filtered_data_filtered) > 1) { |
| 4549 cat(subsection_header(sprintf( | 4694 cat(subsection_header(sprintf( |
| 4550 "Intensity distributions for %d phosphopeptides\n", | 4695 "Intensity distributions for %d phosphopeptides, %s\n", |
| 4551 nrow(filtered_data_filtered) | 4696 nrow(filtered_data_filtered), |
| 4697 adj_p_string | |
| 4552 ))) | 4698 ))) |
| 4553 } else { | 4699 } else { |
| 4554 cat(subsection_header(sprintf( | 4700 cat( |
| 4555 "Intensity distribution for one phosphopeptide (%s)\n", | 4701 subsection_header( |
| 4556 rownames(filtered_data_filtered)[1] | 4702 sprintf( |
| 4557 ))) | 4703 "Intensity distribution for one phosphopeptide, %s\n", |
| 4704 adj_p_string | |
| 4705 ) | |
| 4706 ), | |
| 4707 rownames(filtered_data_filtered)[1], | |
| 4708 "\n" | |
| 4709 ) | |
| 4558 } | 4710 } |
| 4559 }) # end latex_samepage | 4711 }) # end latex_samepage |
| 4560 | 4712 |
| 4561 old_par <- par( | 4713 old_par <- par( |
| 4562 mai = (par("mai") + c(0.7, 0, 0, 0)) * c(1, 1, 0.3, 1), | 4714 mai = (par("mai") + c(0.7, 0, 0, 0)) * c(1, 1, 0.3, 1), |
| 4635 g_intensity_hm_rows), | 4787 g_intensity_hm_rows), |
| 4636 sprintf("whose adjusted p-value < %0.2f\n", cutoff) | 4788 sprintf("whose adjusted p-value < %0.2f\n", cutoff) |
| 4637 ) | 4789 ) |
| 4638 )) | 4790 )) |
| 4639 } else { | 4791 } else { |
| 4640 if (nrow(m) == 0) { | 4792 if (nrow(m) < 2) { |
| 4641 return(FALSE) | 4793 return(FALSE) |
| 4642 } else { | 4794 } else { |
| 4643 cat(subsection_header( | 4795 cat(subsection_header( |
| 4644 paste( | 4796 paste( |
| 4645 sprintf("Heatmap for %d usable peptide genes whose", nrow(m)), | 4797 sprintf("Heatmap for %d usable peptide genes whose", nrow(m)), |
| 4659 nrow_m <- nrow(m) | 4811 nrow_m <- nrow(m) |
| 4660 ncol_m <- ncol(m) | 4812 ncol_m <- ncol(m) |
| 4661 if (nrow_m > 0) { | 4813 if (nrow_m > 0) { |
| 4662 rownames_m <- rownames(m) | 4814 rownames_m <- rownames(m) |
| 4663 q <- data.frame(pepname = rownames_m) | 4815 q <- data.frame(pepname = rownames_m) |
| 4664 g <- sqldf(" | 4816 g <- sqldf::sqldf(" |
| 4665 SELECT q.pepname, substr(met.Gene_Name, 1, 30) AS gene_name | 4817 SELECT q.pepname, substr(met.Gene_Name, 1, 30) AS gene_name |
| 4666 FROM q, metadata_plus_p AS met | 4818 FROM q, metadata_plus_p AS met |
| 4667 WHERE q.pepname = met.Phosphopeptide | 4819 WHERE q.pepname = met.Phosphopeptide |
| 4668 ORDER BY q.rowid | 4820 ORDER BY q.rowid |
| 4669 ") | 4821 ") |
| 4716 } | 4868 } |
| 4717 } | 4869 } |
| 4718 } | 4870 } |
| 4719 } | 4871 } |
| 4720 } | 4872 } |
| 4721 cat(filter_result_string) | 4873 |
| 4722 cat("\\leavevmode\n") | 4874 cat("\\leavevmode\n") |
| 4723 | 4875 |
| 4724 if (!g_can_run_ksea) { | 4876 if (!g_can_run_ksea) { |
| 4725 errmsg <- paste("Cannot proceed with KSEA analysis", | 4877 errmsg <- paste("Cannot proceed with KSEA analysis", |
| 4726 "because too many values are missing.") | 4878 "because too many values are missing.") |
| 4745 sprintf("%0.3g", display_p_value_data$fdr_adjusted_anova_p) | 4897 sprintf("%0.3g", display_p_value_data$fdr_adjusted_anova_p) |
| 4746 display_p_value_data$quality <- | 4898 display_p_value_data$quality <- |
| 4747 sprintf("%0.3g", display_p_value_data$quality) | 4899 sprintf("%0.3g", display_p_value_data$quality) |
| 4748 | 4900 |
| 4749 headers_1st_line <- | 4901 headers_1st_line <- |
| 4750 c("", "Raw ANOVA", "FDR-adj.", "Missing", "Min. #", "", "") | 4902 c("", "Raw ANOVA", "FDR-adj.", "Missing", "Min. #", |
| 4903 "", "") | |
| 4751 headers_2nd_line <- | 4904 headers_2nd_line <- |
| 4752 c("Phosphopeptide", "p-value", "p-value", "values", "group-obs", "Quality", "Ranking") | 4905 c("Phosphopeptide", "p-value", "p-value", "values", "group-obs", |
| 4906 "Quality", "Ranking") | |
| 4753 data_frame_tabbing_latex( | 4907 data_frame_tabbing_latex( |
| 4754 x = display_p_value_data, | 4908 x = display_p_value_data, |
| 4755 tabstops = c(2.75, 0.80, 0.80, 0.5, 0.6, 0.60), | 4909 tabstops = c(2.75, 0.80, 0.80, 0.5, 0.6, 0.60), |
| 4756 use_subsubsection_header = FALSE, | 4910 use_subsubsection_header = FALSE, |
| 4757 headings = c(headers_1st_line, headers_2nd_line), | 4911 headings = c(headers_1st_line, headers_2nd_line), |
| 5333 | 5487 |
| 5334 # KSEA Analysis Summaries | 5488 # KSEA Analysis Summaries |
| 5335 | 5489 |
| 5336 Results of Kinase-Substrate Enrichment Analysis are presented here, if the substrates for any kinases are relatively enriched. Enrichments are found by the CRAN `KSEAapp` package: | 5490 Results of Kinase-Substrate Enrichment Analysis are presented here, if the substrates for any kinases are relatively enriched. Enrichments are found by the CRAN `KSEAapp` package: |
| 5337 | 5491 |
| 5338 - The package is available on CRAN, at https:/cran.r-project.org/package=KSEAapp | 5492 - The package is available on CRAN, at https://cran.r-project.org/package=KSEAapp |
| 5339 - The method used is described in Casado et al. (2013) [doi:10.1126/scisignal.2003573](https:/doi.org/10.1126/scisignal.2003573) and Wiredja et al (2017) [doi:10.1093/bioinformatics/btx415](https:/doi.org/10.1093/bioinformatics/btx415). | 5493 - The method used is described in Casado et al. (2013) [doi:10.1126/scisignal.2003573](https://doi.org/10.1126/scisignal.2003573) and Wiredja et al (2017) [doi:10.1093/bioinformatics/btx415](https://doi.org/10.1093/bioinformatics/btx415). |
| 5340 - An online alternative (supporting only analysis of human data) is available at [https:/casecpb.shinyapps.io/ksea/](https:/casecpb.shinyapps.io/ksea/). | 5494 - An online alternative (supporting only analysis of human data) is available at [https://casecpb.shinyapps.io/ksea/](https://casecpb.shinyapps.io/ksea/). |
| 5341 | 5495 |
| 5342 For each kinase, $i$, and each two-way contrast of treatments, $j$, an enrichment $z$-score is computed as: | 5496 For each kinase, $i$, and each two-way contrast of treatments, $j$, an enrichment $z$-score is computed as: |
| 5343 | 5497 |
| 5344 $$ | 5498 $$ |
| 5345 \text{kinase enrichment }z\text{-score}_{j,i} = \frac{(\overline{`r sfc`}_{j,i} - \overline{`r pfc`}_j)\sqrt{m_{j,i}}}{\delta_j} | 5499 \text{kinase enrichment }z\text{-score}_{j,i} = \frac{(\overline{`r sfc`}_{j,i} - \overline{`r pfc`}_j)\sqrt{m_{j,i}}}{\delta_j} |
| 5459 ) | 5613 ) |
| 5460 ) | 5614 ) |
| 5461 kseaapp_input <- | 5615 kseaapp_input <- |
| 5462 sqldf::sqldf( | 5616 sqldf::sqldf( |
| 5463 x = sprintf(" | 5617 x = sprintf(" |
| 5464 SELECT `Protein`, `Gene`, `Peptide`, phosphopep AS `Residue.Both`, `p`, `FC` | 5618 SELECT `Protein`,`Gene`,`Peptide`,phosphopep AS `Residue.Both`,`p`,`FC` |
| 5465 FROM v_kseaapp_input | 5619 FROM v_kseaapp_input |
| 5466 WHERE contrast = %d | 5620 WHERE contrast = %d |
| 5467 ", | 5621 ", |
| 5468 i_cntrst | 5622 i_cntrst |
| 5469 ), | 5623 ), |
| 5501 drop = FALSE | 5655 drop = FALSE |
| 5502 ] | 5656 ] |
| 5503 } | 5657 } |
| 5504 | 5658 |
| 5505 if (FALSE) { | 5659 if (FALSE) { |
| 5660 if (print_nb_messages) nbe( | |
| 5661 "Output contents of `ksea_scores_rslt` table\n") | |
| 5662 cat_variable(ksea_scores_rslt) | |
| 5663 cat("\n\\clearpage\n") | |
| 5506 data_frame_tabbing_latex( | 5664 data_frame_tabbing_latex( |
| 5507 x = ksea_scores_rslt, | 5665 x = ksea_scores_rslt, |
| 5508 tabstops = c(0.8, 0.8, 0.8, 0.8, 0.8, 0.8), | 5666 tabstops = c(1.8, 0.8, 0.8, 0.3, 0.8, 0.8), |
| 5509 caption = paste("KSEA scores for contrast ", | 5667 caption = paste("KSEA scores for contrast ", |
| 5510 cntrst_b_level, "to", cntrst_a_level), | 5668 cntrst_b_level, "to", cntrst_a_level), |
| 5511 use_subsubsection_header = FALSE | 5669 use_subsubsection_header = FALSE |
| 5512 ) | 5670 ) |
| 5513 } | |
| 5514 | |
| 5515 if (FALSE) { | |
| 5516 if (print_nb_messages) nbe("Output contents of `ksea_scores_rslt` table\n") | |
| 5517 cat_variable(ksea_scores_rslt) | |
| 5518 cat("\n\\clearpage\n") | |
| 5519 } | 5671 } |
| 5520 | 5672 |
| 5521 if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) { | 5673 if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) { |
| 5522 next_index <- 1 + next_index | 5674 next_index <- 1 + next_index |
| 5523 rslt$score_list[[next_index]] <- ksea_scores_rslt | 5675 rslt$score_list[[next_index]] <- ksea_scores_rslt |
| 5625 ) | 5777 ) |
| 5626 sub_title <- contrast_longlabel | 5778 sub_title <- contrast_longlabel |
| 5627 tryCatch( | 5779 tryCatch( |
| 5628 expr = { | 5780 expr = { |
| 5629 ksea_scores_rslt <- rslt$score_list[[next_index]] | 5781 ksea_scores_rslt <- rslt$score_list[[next_index]] |
| 5630 if (print_nb_messages) nbe(see_variable(ksea_scores_rslt)) #ACE | 5782 if (print_nb_messages) nbe(see_variable(ksea_scores_rslt)) |
| 5631 | 5783 |
| 5632 if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) { | 5784 if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) { |
| 5633 sink(deferred <- file()) | 5785 sink(deferred <- file()) |
| 5634 ksea_low_fdr_print( | 5786 ksea_low_fdr_print( |
| 5635 rslt = rslt, | 5787 rslt = rslt, |
| 5758 loaded_packages_df <- data.frame( | 5910 loaded_packages_df <- data.frame( |
| 5759 package = loaded_packages_df$package, | 5911 package = loaded_packages_df$package, |
| 5760 version = loaded_packages_df$loadedversion, | 5912 version = loaded_packages_df$loadedversion, |
| 5761 date = loaded_packages_df$date | 5913 date = loaded_packages_df$date |
| 5762 ) | 5914 ) |
| 5763 #ACE cat("\\clearpage\n\\section{R package versions}\n") | |
| 5764 #ACE data_frame_tabbing_latex( | |
| 5765 #ACE x = loaded_packages_df, | |
| 5766 #ACE tabstops = c(2.5, 1.25), | |
| 5767 #ACE caption = "R package versions" | |
| 5768 #ACE ) | |
| 5769 cat("\\clearpage\n\\section{Input parameter settings}\n") | 5915 cat("\\clearpage\n\\section{Input parameter settings}\n") |
| 5770 data_frame_tabbing_latex( | 5916 data_frame_tabbing_latex( |
| 5771 x = param_df, | 5917 x = param_df, |
| 5772 tabstops = c(1.75), | 5918 tabstops = c(1.75), |
| 5773 underscore_whack = TRUE, | 5919 underscore_whack = TRUE, |
| 5784 knitr::knit_exit() | 5930 knitr::knit_exit() |
| 5785 } | 5931 } |
| 5786 ``` | 5932 ``` |
| 5787 | 5933 |
| 5788 ```{r kseabar, echo = FALSE, fig.dim = c(9.5, 12.3), results = 'asis'} | 5934 ```{r kseabar, echo = FALSE, fig.dim = c(9.5, 12.3), results = 'asis'} |
| 5935 # nolint start | |
| 5936 # squash un-actionable cyclomatic_complexity warning | |
| 5789 if (have_kinase_descriptions) { | 5937 if (have_kinase_descriptions) { |
| 5938 # nolint end | |
| 5790 my_section_header <- | 5939 my_section_header <- |
| 5791 sprintf( | 5940 sprintf( |
| 5792 "inases whose KSEA %s < %s\n", | 5941 "inases whose KSEA %s < %s\n", |
| 5793 ksea_cutoff_statistic, | 5942 ksea_cutoff_statistic, |
| 5794 signif(ksea_cutoff_threshold, 2) | 5943 signif(ksea_cutoff_threshold, 2) |
| 5824 caption = paste0("Descriptions of k", my_section_header) | 5973 caption = paste0("Descriptions of k", my_section_header) |
| 5825 ) | 5974 ) |
| 5826 } | 5975 } |
| 5827 | 5976 |
| 5828 if (FALSE) { | 5977 if (FALSE) { |
| 5829 cat_variable(sqldf("SELECT kinase FROM enriched_kinases")) | 5978 cat_variable(sqldf::sqldf("SELECT kinase FROM enriched_kinases")) |
| 5830 cat_variable(sqldf(" | 5979 cat_variable(sqldf::sqldf(" |
| 5831 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep | 5980 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep |
| 5832 FROM pseudo_ksdata | 5981 FROM pseudo_ksdata |
| 5833 WHERE gene IN (SELECT kinase FROM enriched_kinases) | 5982 WHERE gene IN (SELECT kinase FROM enriched_kinases) |
| 5834 ")) | 5983 ")) |
| 5835 data_frame_table_latex( | 5984 data_frame_table_latex( |
| 5836 x = sqldf(" | 5985 x = sqldf::sqldf(" |
| 5837 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep | 5986 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep |
| 5838 FROM pseudo_ksdata | 5987 FROM pseudo_ksdata |
| 5839 WHERE gene IN (SELECT kinase FROM enriched_kinases) | 5988 WHERE gene IN (SELECT kinase FROM enriched_kinases) |
| 5840 "), | 5989 "), |
| 5841 justification = "l l l", | 5990 justification = "l l l", |
| 5843 caption = "substrates of enriched kinases", | 5992 caption = "substrates of enriched kinases", |
| 5844 anchor = c(const_table_anchor_p, const_table_anchor_t), | 5993 anchor = c(const_table_anchor_p, const_table_anchor_t), |
| 5845 underscore_whack = TRUE | 5994 underscore_whack = TRUE |
| 5846 ) | 5995 ) |
| 5847 data_frame_table_latex( | 5996 data_frame_table_latex( |
| 5848 x = sqldf(" | 5997 x = sqldf::sqldf(" |
| 5849 SELECT | 5998 SELECT |
| 5850 gene AS kinase, | 5999 gene AS kinase, |
| 5851 ppep, | 6000 ppep, |
| 5852 sub_gene, | 6001 sub_gene, |
| 5853 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label, | 6002 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label, |
| 5869 caption = "labeled substrates of enriched kinases", | 6018 caption = "labeled substrates of enriched kinases", |
| 5870 anchor = c(const_table_anchor_p, const_table_anchor_t), | 6019 anchor = c(const_table_anchor_p, const_table_anchor_t), |
| 5871 underscore_whack = TRUE | 6020 underscore_whack = TRUE |
| 5872 ) | 6021 ) |
| 5873 } | 6022 } |
| 5874 all_enriched_substrates <- sqldf(" | 6023 all_enriched_substrates <- sqldf::sqldf(" |
| 5875 SELECT | 6024 SELECT |
| 5876 gene AS kinase, | 6025 gene AS kinase, |
| 5877 ppep, | 6026 ppep, |
| 5878 sub_gene, | 6027 sub_gene, |
| 5879 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label, | 6028 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label, |
| 5897 , | 6046 , |
| 5898 drop = FALSE | 6047 drop = FALSE |
| 5899 ] | 6048 ] |
| 5900 | 6049 |
| 5901 all_enriched_substrates$sub_gene <- | 6050 all_enriched_substrates$sub_gene <- |
| 5902 sub( | 6051 sub(" ///.*", "...", |
| 5903 " ///.*", | |
| 5904 " ...", | |
| 5905 all_enriched_substrates$sub_gene | 6052 all_enriched_substrates$sub_gene |
| 5906 ) | 6053 ) |
| 5907 | 6054 |
| 5908 all_enriched_substrates$label <- | 6055 all_enriched_substrates$label <- |
| 5909 with( | 6056 with( |
| 5926 if (g_neednewpage) cat("\\newpage\n") | 6073 if (g_neednewpage) cat("\\newpage\n") |
| 5927 g_neednewpage <- TRUE | 6074 g_neednewpage <- TRUE |
| 5928 if (nrow(m) > g_intensity_hm_rows) { | 6075 if (nrow(m) > g_intensity_hm_rows) { |
| 5929 cat(subsection_header( | 6076 cat(subsection_header( |
| 5930 sprintf( | 6077 sprintf( |
| 5931 "Highest-quality %d (of %d) enriched %s-substrates", | 6078 "Highest-quality %d (of %d) enriched %s-sites", |
| 5932 g_intensity_hm_rows, | 6079 g_intensity_hm_rows, |
| 5933 nrow(m), | 6080 nrow(m), |
| 5934 kinase | 6081 kinase |
| 5935 ) | 6082 ) |
| 5936 )) | 6083 )) |
| 5939 return(FALSE) | 6086 return(FALSE) |
| 5940 } else { | 6087 } else { |
| 5941 nrow_m <- nrow(m) | 6088 nrow_m <- nrow(m) |
| 5942 cat(subsection_header( | 6089 cat(subsection_header( |
| 5943 sprintf( | 6090 sprintf( |
| 5944 "%d enriched %s-substrate%s", | 6091 "%d enriched %s-site%s", |
| 5945 nrow_m, | 6092 nrow_m, |
| 5946 kinase, | 6093 kinase, |
| 5947 if (nrow_m > 1) "s" else "" | 6094 if (nrow_m > 1) "s" else "" |
| 5948 ) | 6095 ) |
| 5949 )) | 6096 )) |
| 6023 | 6170 |
| 6024 } | 6171 } |
| 6025 ``` | 6172 ``` |
| 6026 | 6173 |
| 6027 ```{r enriched, echo = FALSE, fig.dim = c(12, 13.7), results = 'asis'} | 6174 ```{r enriched, echo = FALSE, fig.dim = c(12, 13.7), results = 'asis'} |
| 6175 # nolint start | |
| 6176 # squash un-actionable cyclomatic_complexity warning | |
| 6028 if (g_can_run_ksea) { | 6177 if (g_can_run_ksea) { |
| 6178 # nolint end | |
| 6029 g_did_enriched_header <- FALSE | 6179 g_did_enriched_header <- FALSE |
| 6030 for (kinase_name in sort(enriched_kinases$kinase)) { | 6180 for (kinase_name in sort(enriched_kinases$kinase)) { |
| 6031 enriched_substrates <- | 6181 enriched_substrates <- |
| 6032 all_enriched_substrates[ | 6182 all_enriched_substrates[ |
| 6033 all_enriched_substrates$kinase == kinase_name, | 6183 all_enriched_substrates$kinase == kinase_name, |
| 6046 ten_trunc_ppep | 6196 ten_trunc_ppep |
| 6047 ) | 6197 ) |
| 6048 ) | 6198 ) |
| 6049 # Get the intensity values for the heatmap | 6199 # Get the intensity values for the heatmap |
| 6050 enriched_intensities <- | 6200 enriched_intensities <- |
| 6051 as.matrix(unimputed_quant_data_log[enriched_substrates$ppep, , drop = FALSE]) | 6201 as.matrix(unimputed_quant_data_log[ |
| 6202 enriched_substrates$ppep, , drop = FALSE | |
| 6203 ]) | |
| 6052 | 6204 |
| 6053 # Remove rows having too many NA values to be relevant | 6205 # Remove rows having too many NA values to be relevant |
| 6054 good_rows <- (rowSums(enriched_intensities, na.rm = TRUE) != 0) | 6206 good_rows <- (rowSums(enriched_intensities, na.rm = TRUE) != 0) |
| 6055 #ACE nbe(see_variable(good_rows), "\n") | 6207 |
| 6056 enriched_substrates <- enriched_substrates[good_rows, , drop = FALSE] | 6208 enriched_substrates <- enriched_substrates[good_rows, , drop = FALSE] |
| 6057 enriched_intensities <- enriched_intensities[good_rows, , drop = FALSE] | 6209 enriched_intensities <- enriched_intensities[good_rows, , drop = FALSE] |
| 6058 | 6210 |
| 6059 # Rename the rows with the display-name for the heatmap | 6211 # Rename the rows with the display-name for the heatmap |
| 6060 short_row_names <- sub( | 6212 short_row_names <- sub( |
| 6074 ) | 6226 ) |
| 6075 rownames(enriched_intensities) <- long_row_names | 6227 rownames(enriched_intensities) <- long_row_names |
| 6076 # Format as matrix for heatmap | 6228 # Format as matrix for heatmap |
| 6077 m <- as.matrix(enriched_intensities) | 6229 m <- as.matrix(enriched_intensities) |
| 6078 rownames(m) <- trunc_enriched_substrate(rownames(m)) | 6230 rownames(m) <- trunc_enriched_substrate(rownames(m)) |
| 6079 | |
| 6080 #ACE nb("m with bad rows: ", see_variable(m), "\n") | |
| 6081 #ACE good_rows <- (rowSums(m, na.rm = TRUE) != 0) | |
| 6082 #ACE nb(see_variable(good_rows), "\n") | |
| 6083 #ACE m <- m[good_rows, , drop = FALSE] | |
| 6084 #ACE nb("m without(?) bad rows: ", see_variable(m), "\n") | |
| 6085 #ACE nb(see_variable(short_row_names), "\n") | |
| 6086 #ACE local_short_row_names <- short_row_names[good_rows] | |
| 6087 #ACE local_long_row_names <- long_row_names[good_rows] | |
| 6088 #ACE local_enriched_intensities <- enriched_intensities[local_long_row_names, ] | |
| 6089 | 6231 |
| 6090 # Draw the heading and heatmap | 6232 # Draw the heading and heatmap |
| 6091 nrow_m <- nrow(m) | 6233 nrow_m <- nrow(m) |
| 6092 if (nrow_m > 0) { | 6234 if (nrow_m > 0) { |
| 6093 if (!g_did_enriched_header) { | 6235 if (!g_did_enriched_header) { |
| 6096 g_did_enriched_header <- TRUE | 6238 g_did_enriched_header <- TRUE |
| 6097 } | 6239 } |
| 6098 is_na_m <- is.na(m) | 6240 is_na_m <- is.na(m) |
| 6099 cellnote_m <- is_na_m | 6241 cellnote_m <- is_na_m |
| 6100 cellnote_m[!is_na_m] <- "" | 6242 cellnote_m[!is_na_m] <- "" |
| 6101 cellnote_m[is_na_m] <- "NA" | 6243 cellnote_m[is_na_m] <- params$heatMapNAcellNote |
| 6102 cut_args <- new_env() | 6244 cut_args <- new_env() |
| 6103 cut_args$cutoff <- cutoff | 6245 cut_args$cutoff <- cutoff |
| 6104 cut_args$kinase <- kinase_name | 6246 cut_args$kinase <- kinase_name |
| 6105 cut_args$statistic <- ksea_cutoff_statistic | 6247 cut_args$statistic <- ksea_cutoff_statistic |
| 6106 cut_args$threshold <- ksea_cutoff_threshold | 6248 cut_args$threshold <- ksea_cutoff_threshold |
| 6108 ppep_heatmap( | 6250 ppep_heatmap( |
| 6109 m = m, | 6251 m = m, |
| 6110 cellnote = cellnote_m, | 6252 cellnote = cellnote_m, |
| 6111 cutoff = cut_args, | 6253 cutoff = cut_args, |
| 6112 hm_heading_function = cat_enriched_heading, | 6254 hm_heading_function = cat_enriched_heading, |
| 6113 hm_main_title | 6255 hm_main_title = paste( |
| 6114 = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates", | 6256 "Unnormalized (zero-imputed)", |
| 6257 "intensities of enriched kinase-substrates"), | |
| 6115 suppress_row_dendrogram = FALSE, | 6258 suppress_row_dendrogram = FALSE, |
| 6116 master_cex = 0.35, | 6259 master_cex = 0.35, |
| 6117 sepcolor = "black", | 6260 sepcolor = "black", |
| 6118 colsep = sample_colsep, | 6261 colsep = sample_colsep, |
| 6119 row_scaling = "none" | 6262 row_scaling = "none" |
| 6121 if (number_of_peptides_found > 1) { | 6264 if (number_of_peptides_found > 1) { |
| 6122 | 6265 |
| 6123 tryCatch( | 6266 tryCatch( |
| 6124 { | 6267 { |
| 6125 rownames(m) <- short_row_names | 6268 rownames(m) <- short_row_names |
| 6126 cov_heatmap(m, nrow_m > g_intensity_hm_rows) | 6269 cov_heatmap( |
| 6270 m = m, | |
| 6271 kinase_name = kinase_name, | |
| 6272 top_substrates = nrow_m > g_intensity_hm_rows | |
| 6273 ) | |
| 6127 }, | 6274 }, |
| 6128 error = function(e) { | 6275 error = function(e) { |
| 6129 cat( | 6276 cat( |
| 6130 sprintf( | 6277 sprintf( |
| 6131 "ERROR: %s\n\\newline\n", | 6278 "ERROR: %s\n\\newline\n", |
| 6160 headers_1st_line <- c("", "", "ANOVA", "min(values", "") | 6307 headers_1st_line <- c("", "", "ANOVA", "min(values", "") |
| 6161 if (1 < nrow(enriched_substrates)) | 6308 if (1 < nrow(enriched_substrates)) |
| 6162 cat("\n\\newpage\n") | 6309 cat("\n\\newpage\n") |
| 6163 cat(subsubsection_header( | 6310 cat(subsubsection_header( |
| 6164 sprintf( | 6311 sprintf( |
| 6165 "Details for %s%s-substrates", | 6312 "Details for %s%s-sites", |
| 6166 if (excess_substrates) | 6313 if (excess_substrates) |
| 6167 sprintf( | 6314 sprintf( |
| 6168 "%s \"highest quality\" ", | 6315 "%s \"highest quality\" ", |
| 6169 g_intensity_hm_rows | 6316 g_intensity_hm_rows |
| 6170 ) | 6317 ) |
| 6188 | 6335 |
| 6189 # get kinase, ppep, concat(kinase) tuples for enriched kinases | 6336 # get kinase, ppep, concat(kinase) tuples for enriched kinases |
| 6190 | 6337 |
| 6191 if (print_nb_messages) nb("kinase_ppep_label <- ...\n") | 6338 if (print_nb_messages) nb("kinase_ppep_label <- ...\n") |
| 6192 if (print_nb_messages) nbe("kinase_ppep_label <- ...\n") | 6339 if (print_nb_messages) nbe("kinase_ppep_label <- ...\n") |
| 6193 kinase_ppep_label <- sqldf(" | 6340 kinase_ppep_label <- sqldf::sqldf(" |
| 6194 WITH | 6341 WITH |
| 6195 t(ppep, label) AS | 6342 t(ppep, label) AS |
| 6196 ( | 6343 ( |
| 6197 SELECT DISTINCT | 6344 SELECT DISTINCT |
| 6198 SUB_MOD_RSD AS ppep, | 6345 SUB_MOD_RSD AS ppep, |
| 6228 ON f.Phosphopeptide = k.ppep, | 6375 ON f.Phosphopeptide = k.ppep, |
| 6229 impish q | 6376 impish q |
| 6230 WHERE | 6377 WHERE |
| 6231 f.Phosphopeptide = q.Phosphopeptide | 6378 f.Phosphopeptide = q.Phosphopeptide |
| 6232 " | 6379 " |
| 6233 data_table_imputed <- sqldf(data_table_imputed_sql) | 6380 data_table_imputed <- sqldf::sqldf(data_table_imputed_sql) |
| 6234 # Zap the duplicated 'Phosphopeptide' column named 'ppep' | 6381 # Zap the duplicated 'Phosphopeptide' column named 'ppep' |
| 6235 data_table_imputed <- | 6382 data_table_imputed <- |
| 6236 data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))] | 6383 data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))] |
| 6237 | 6384 |
| 6238 # Output imputed, un-normalized data | 6385 # Output imputed, un-normalized data |
| 6239 if (print_nb_messages) nb("Output imputed, un-normalized data tabular file\n") | 6386 if (print_nb_messages) nb("Output imputed, un-normalized data tabular file\n") |
| 6240 if (print_nb_messages) nbe("Output imputed, un-normalized data tabular file\n") | 6387 if (print_nb_messages) nbe( |
| 6388 "Output imputed, un-normalized data tabular file\n") | |
| 6241 write.table( | 6389 write.table( |
| 6242 data_table_imputed | 6390 data_table_imputed |
| 6243 , file = imputed_data_filename | 6391 , file = imputed_data_filename |
| 6244 , sep = "\t" | 6392 , sep = "\t" |
| 6245 , col.names = TRUE | 6393 , col.names = TRUE |
| 6249 | 6397 |
| 6250 | 6398 |
| 6251 #output quantile normalized data | 6399 #output quantile normalized data |
| 6252 impish <- cbind(rownames(quant_data_imp_qn_log), quant_data_imp_qn_log) | 6400 impish <- cbind(rownames(quant_data_imp_qn_log), quant_data_imp_qn_log) |
| 6253 colnames(impish)[1] <- "Phosphopeptide" | 6401 colnames(impish)[1] <- "Phosphopeptide" |
| 6254 data_table_imputed <- sqldf(data_table_imputed_sql) | 6402 data_table_imputed <- sqldf::sqldf(data_table_imputed_sql) |
| 6255 # Zap the duplicated 'Phosphopeptide' column named 'ppep' | 6403 # Zap the duplicated 'Phosphopeptide' column named 'ppep' |
| 6256 data_table_imputed <- | 6404 data_table_imputed <- |
| 6257 data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))] | 6405 data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))] |
| 6258 if (print_nb_messages) nb("Output quantile normalized data tabular file\n") | 6406 if (print_nb_messages) nb("Output quantile normalized data tabular file\n") |
| 6259 if (print_nb_messages) nbe("Output quantile normalized data tabular file\n") | 6407 if (print_nb_messages) nbe("Output quantile normalized data tabular file\n") |
| 6264 col.names = TRUE, | 6412 col.names = TRUE, |
| 6265 row.names = FALSE, | 6413 row.names = FALSE, |
| 6266 quote = FALSE | 6414 quote = FALSE |
| 6267 ) | 6415 ) |
| 6268 | 6416 |
| 6269 ppep_kinase <- sqldf(" | 6417 ppep_kinase <- sqldf::sqldf(" |
| 6270 SELECT DISTINCT k.ppep, k.kinase | 6418 SELECT DISTINCT k.ppep, k.kinase |
| 6271 FROM ( | 6419 FROM ( |
| 6272 SELECT DISTINCT gene AS kinase, SUB_MOD_RSD AS ppep | 6420 SELECT DISTINCT gene AS kinase, SUB_MOD_RSD AS ppep |
| 6273 FROM pseudo_ksdata | 6421 FROM pseudo_ksdata |
| 6274 WHERE GENE IN (SELECT kinase FROM enriched_kinases) | 6422 WHERE GENE IN (SELECT kinase FROM enriched_kinases) |
| 6312 ON m.phospho_peptide = kek.ppep | 6460 ON m.phospho_peptide = kek.ppep |
| 6313 ; | 6461 ; |
| 6314 " | 6462 " |
| 6315 ) | 6463 ) |
| 6316 | 6464 |
| 6317 if (print_nb_messages) nb("Output contents of `stats_metadata_v` table to tabular file\n") | 6465 if (print_nb_messages) nb( |
| 6318 if (print_nb_messages) nbe("Output contents of `stats_metadata_v` table to tabular file\n") | 6466 "Output contents of `stats_metadata_v` table to tabular file\n") |
| 6467 if (print_nb_messages) nbe( | |
| 6468 "Output contents of `stats_metadata_v` table to tabular file\n") | |
| 6469 | |
| 6319 write.table( | 6470 write.table( |
| 6320 dbReadTable(db, "stats_metadata_v"), | 6471 dbReadTable(db, "stats_metadata_v"), |
| 6321 file = anova_ksea_mtdt_file, | 6472 file = anova_ksea_mtdt_file, |
| 6322 sep = "\t", | 6473 sep = "\t", |
| 6323 col.names = TRUE, | 6474 col.names = TRUE, |
