Mercurial > repos > eschen42 > w4mcorcov
comparison w4mcorcov_calc.R @ 7:ca9938f2eb6a draft default tip
"planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 5fd9687d543a48a715b1180caf93abebebd58b0e"
author | eschen42 |
---|---|
date | Tue, 17 Nov 2020 23:29:59 +0000 |
parents | 0b49916c5c52 |
children |
comparison
equal
deleted
inserted
replaced
6:0b49916c5c52 | 7:ca9938f2eb6a |
---|---|
1 # center with 'colMeans()' - ref: http://gastonsanchez.com/visually-enforced/how-to/2014/01/15/Center-data-in-R/ | 1 # compute and output detail plots |
2 center_colmeans <- function(x) { | |
3 xcenter = colMeans(x) | |
4 x - rep(xcenter, rep.int(nrow(x), ncol(x))) | |
5 } | |
6 | |
7 #### OPLS-DA | |
8 algoC <- "nipals" | |
9 | |
10 do_detail_plot <- function( | 2 do_detail_plot <- function( |
11 x_dataMatrix | 3 x_dataMatrix |
12 , x_predictor | 4 , x_predictor |
13 , x_is_match | 5 , x_is_match |
14 , x_algorithm | 6 , x_algorithm |
19 , x_crossval_i | 11 , x_crossval_i |
20 ) { | 12 ) { |
21 off <- function(x) if (x_show_labels == "0") 0 else x | 13 off <- function(x) if (x_show_labels == "0") 0 else x |
22 if ( x_is_match | 14 if ( x_is_match |
23 && ncol(x_dataMatrix) > 0 | 15 && ncol(x_dataMatrix) > 0 |
24 && length(unique(x_predictor))> 1 | 16 && length(unique(x_predictor)) > 1 |
25 && x_crossval_i < nrow(x_dataMatrix) | 17 && x_crossval_i < nrow(x_dataMatrix) |
26 ) { | 18 ) { |
27 my_oplsda <- opls( | 19 my_oplsda <- opls( |
28 x = x_dataMatrix | 20 x = x_dataMatrix |
29 , y = x_predictor | 21 , y = x_predictor |
30 , algoC = x_algorithm | 22 , algoC = x_algorithm |
31 , predI = 1 | 23 , predI = 1 |
32 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0 | 24 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0 |
33 , printL = FALSE | 25 , fig.pdfC = 'none' |
34 , plotL = FALSE | 26 , info.txtC = 'none' |
35 , crossvalI = x_crossval_i | 27 , crossvalI = x_crossval_i |
36 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2. | 28 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2. |
37 ) | 29 ) |
38 # strip out variables having negligible variance | 30 # strip out variables having negligible variance |
39 x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE] | 31 x_dataMatrix <- x_dataMatrix[ , names(my_oplsda@vipVn), drop = FALSE] |
40 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) | 32 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) |
41 # x_progress(strF(x_dataMatrix)) | 33 |
42 # x_progress(strF(my_oplsda)) | |
43 #x_progress(head(my_oplsda_suppLs_y_levels)) | |
44 #x_progress(unique(my_oplsda_suppLs_y_levels)) | |
45 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] | 34 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] |
46 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] | 35 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] |
47 do_s_plot <- function( | 36 do_s_plot <- function( |
48 x_env | 37 x_env |
49 , predictor_projection_x = TRUE | 38 , predictor_projection_x = TRUE |
50 , cplot_x = FALSE | 39 , cplot_x = FALSE |
51 , cor_vs_cov_x = NULL | 40 , cor_vs_cov_x = NULL |
52 ) | 41 ) { |
53 { | |
54 #print(ls(x_env)) # "cplot_y" etc | |
55 #print(str(x_env$cplot_y)) # chr "covariance" | |
56 if (cplot_x) { | 42 if (cplot_x) { |
57 #print(x_env$cplot_y) # "covariance" | |
58 cplot_y_correlation <- (x_env$cplot_y == "correlation") | 43 cplot_y_correlation <- (x_env$cplot_y == "correlation") |
59 #print(cplot_y_correlation) # FALSE | 44 default_lim_x <- 10 |
45 } else { | |
46 default_lim_x <- 1.2 | |
60 } | 47 } |
61 if (is.null(cor_vs_cov_x)) { | 48 if (is.null(cor_vs_cov_x)) { |
62 my_cor_vs_cov <- cor_vs_cov( | 49 my_cor_vs_cov <- cor_vs_cov( |
63 matrix_x = x_dataMatrix | 50 matrix_x = x_dataMatrix |
64 , ropls_x = my_oplsda | 51 , ropls_x = my_oplsda |
65 , predictor_projection_x = predictor_projection_x | 52 , predictor_projection_x = predictor_projection_x |
66 , x_progress | 53 , x_progress |
54 , x_env | |
67 ) | 55 ) |
68 } else { | 56 } else { |
69 my_cor_vs_cov <- cor_vs_cov_x | 57 my_cor_vs_cov <- cor_vs_cov_x |
70 } | 58 } |
71 # print("str(my_cor_vs_cov)") | 59 |
72 # str(my_cor_vs_cov) | |
73 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { | 60 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { |
74 if (is.null(cor_vs_cov_x)) { | 61 if (is.null(cor_vs_cov_x)) { |
75 x_progress("No cor_vs_cov data produced") | 62 x_progress( |
63 sprintf("No cor_vs_cov data produced for level %s versus %s", fctr_lvl_1, fctr_lvl_2) | |
64 ) | |
76 } | 65 } |
77 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") | 66 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") |
78 text(x=1, y=1, labels="too few covariance data") | 67 text(x=1, y=1, labels="too few covariance data") |
79 return(my_cor_vs_cov) | 68 return(my_cor_vs_cov) |
80 } | 69 } |
82 my_cor_vs_cov | 71 my_cor_vs_cov |
83 , { | 72 , { |
84 min_x <- min(covariance, na.rm = TRUE) | 73 min_x <- min(covariance, na.rm = TRUE) |
85 max_x <- max(covariance, na.rm = TRUE) | 74 max_x <- max(covariance, na.rm = TRUE) |
86 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) | 75 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) |
87 covariance <- covariance / lim_x | 76 |
88 lim_x <- 1.2 | 77 # Regarding using VIP as a guide to selecting a biomarker: |
89 # "It is generally accepted that a variable should be selected if vj>1, [27–29], | 78 # "It is generally accepted that a variable should be selected if vj>1, [27–29], |
90 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." | 79 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." |
91 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) | 80 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) |
92 plus_cor <- correlation | 81 plus_cor <- correlation |
93 plus_cov <- covariance | 82 plus_cov <- covariance |
94 cex <- 0.65 | 83 if (length(plus_cor) != 0 && length(plus_cor) != 0) { |
95 which_projection <- if (projection == 1) "t1" else "o1" | 84 cex <- 0.65 |
96 which_loading <- if (projection == 1) "parallel" else "orthogonal" | 85 if (projection == 1) { |
97 if (projection == 1) { # predictor-projection | 86 # predictor-projection |
98 vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) | 87 vipcp <- pmax(0, pmin(1, (vip4p - 0.83) / (1.21 - 0.83))) |
99 if (!cplot_x) { # S-plot predictor-projection | 88 if (!cplot_x) { |
100 my_xlab <- "relative covariance(feature,t1)" | 89 # S-plot predictor-projection |
101 my_x <- plus_cov | 90 my_xlab <- "covariance(feature,t1)" |
102 my_ylab <- "correlation(feature,t1)" | 91 my_x <- plus_cov |
103 my_y <- plus_cor | |
104 } else { # C-plot predictor-projection | |
105 my_xlab <- "variable importance in predictor-projection" | |
106 my_x <- vip4p | |
107 if (cplot_y_correlation) { | |
108 my_ylab <- "correlation(feature,t1)" | 92 my_ylab <- "correlation(feature,t1)" |
109 my_y <- plus_cor | 93 my_y <- plus_cor |
94 # X,Y limits for S-PLOT | |
95 my_xlim <- c( -lim_x, lim_x ) * (1.0 + off(0.3)) | |
96 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) | |
110 } else { | 97 } else { |
111 my_ylab <- "relative covariance(feature,t1)" | 98 # C-plot predictor-projection |
112 my_y <- plus_cov | 99 my_xlab <- "variable importance in predictor-projection" |
100 my_x <- vip4p | |
101 if (cplot_y_correlation) { | |
102 my_ylab <- "correlation(feature,t1)" | |
103 my_y <- plus_cor | |
104 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) | |
105 } else { | |
106 my_ylab <- "covariance(feature,t1)" | |
107 my_y <- plus_cov | |
108 my_ylim <- max(abs(plus_cov)) | |
109 my_ylim <- c( -my_ylim, my_ylim ) * (1.0 + off(0.2) ) | |
110 } | |
111 # X,Y limits for C-plot | |
112 lim_x <- max(my_x, na.rm = TRUE) * 1.1 | |
113 lim_x <- min(lim_x, default_lim_x) | |
114 my_xlim <- c( 0, lim_x ) # + off(0.2) ) | |
113 } | 115 } |
114 } | 116 my_load_distal <- loadp |
115 if (cplot_x) { | 117 my_load_proximal <- loado |
116 lim_x <- max(my_x, na.rm = TRUE) * 1.1 | 118 red <- as.numeric(correlation > 0) * vipcp |
117 my_xlim <- c( 0, lim_x + off(0.2) ) | 119 blue <- as.numeric(correlation < 0) * vipcp |
120 alpha <- 0.1 + 0.4 * vipcp | |
121 red[is.na(red)] <- 0 | |
122 blue[is.na(blue)] <- 0 | |
123 alpha[is.na(alpha)] <- 0 | |
124 my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha) | |
125 main_label <- sprintf("%s for level %s versus %s" | |
126 , x_prefix, fctr_lvl_1, fctr_lvl_2) | |
118 } else { | 127 } else { |
119 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) | 128 # orthogonal projection |
120 } | 129 vipco <- pmax(0, pmin(1, (vip4o - 0.83) / (1.21 - 0.83))) |
121 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) | 130 if (!cplot_x) { |
122 my_load_distal <- loadp | 131 # S-PLOT orthogonal-projection |
123 my_load_proximal <- loado | 132 my_xlab <- "covariance(feature,to1)" |
124 red <- as.numeric(correlation > 0) * vipcp | 133 my_x <- -plus_cov |
125 blue <- as.numeric(correlation < 0) * vipcp | 134 # X,Y limits for S-PLOT |
126 alpha <- 0.1 + 0.4 * vipcp | 135 my_xlim <- c( -lim_x, lim_x ) * (1.0 + off(0.3)) |
127 red[is.na(red)] <- 0 | |
128 blue[is.na(blue)] <- 0 | |
129 alpha[is.na(alpha)] <- 0 | |
130 my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha) | |
131 main_label <- sprintf("%s for level %s versus %s" | |
132 , x_prefix, fctr_lvl_1, fctr_lvl_2) | |
133 } else { # orthogonal projection | |
134 vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83))) | |
135 if (!cplot_x) { | |
136 my_xlab <- "relative covariance(feature,to1)" | |
137 my_x <- -plus_cov | |
138 } else { | |
139 my_xlab <- "variable importance in orthogonal projection" | |
140 my_x <- vip4o | |
141 } | |
142 if (!cplot_x) { # S-plot orthogonal projection | |
143 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) | |
144 my_ylab <- "correlation(feature,to1)" | |
145 my_y <- plus_cor | |
146 } else { # C-plot orthogonal projection | |
147 lim_x <- max(my_x, na.rm = TRUE) * 1.1 | |
148 my_xlim <- c( 0, lim_x + off(0.2) ) | |
149 if (cplot_y_correlation) { | |
150 my_ylab <- "correlation(feature,to1)" | 136 my_ylab <- "correlation(feature,to1)" |
151 my_y <- plus_cor | 137 my_y <- plus_cor |
138 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) | |
152 } else { | 139 } else { |
153 my_ylab <- "relative covariance(feature,to1)" | 140 # C-plot orthogonal-projection |
154 my_y <- plus_cov | 141 my_xlab <- "variable importance in orthogonal projection" |
142 my_x <- vip4o | |
143 # C-plot orthogonal projection | |
144 # X,Y limits for C-plot | |
145 lim_x <- max(my_x, na.rm = TRUE) * 1.1 | |
146 my_xlim <- c( 0, lim_x ) # + off(0.2) ) | |
147 if (cplot_y_correlation) { | |
148 my_ylab <- "correlation(feature,to1)" | |
149 my_y <- plus_cor | |
150 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) | |
151 } else { | |
152 my_ylab <- "covariance(feature,to1)" | |
153 my_y <- plus_cov | |
154 my_ylim <- max(abs(plus_cov)) | |
155 my_ylim <- c( -my_ylim, my_ylim ) * (1.0 + off(0.2) ) | |
156 } | |
155 } | 157 } |
158 my_load_distal <- loado | |
159 my_load_proximal <- loadp | |
160 alpha <- 0.1 + 0.4 * vipco | |
161 alpha[is.na(alpha)] <- 0 | |
162 my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha) | |
163 main_label <- sprintf( | |
164 "Features influencing orthogonal projection for %s versus %s" | |
165 , fctr_lvl_1, fctr_lvl_2) | |
156 } | 166 } |
157 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) | 167 main_cex <- min(1.0, 46.0/nchar(main_label)) |
158 my_load_distal <- loado | 168 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward |
159 my_load_proximal <- loadp | 169 my_pch <- sapply(X = cor_p_value, function(x) if (x < 0.01) 16 else if (x < 0.05) 17 else 18) |
160 alpha <- 0.1 + 0.4 * vipco | 170 if ( sum(is.infinite(my_xlim)) == 0 ) { |
161 alpha[is.na(alpha)] <- 0 | 171 plot( |
162 my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha) | 172 y = my_y |
163 main_label <- sprintf( | 173 , x = my_x |
164 "Features influencing orthogonal projection for %s versus %s" | 174 , type = "p" |
165 , fctr_lvl_1, fctr_lvl_2) | 175 , xlim = my_xlim |
166 } | 176 , ylim = my_ylim |
167 main_cex <- min(1.0, 46.0/nchar(main_label)) | 177 , xlab = my_xlab |
168 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward | 178 , ylab = my_ylab |
169 plot( | 179 , main = main_label |
170 y = my_y | 180 , cex.main = main_cex |
171 , x = my_x | 181 , cex = cex |
172 , type = "p" | 182 , pch = my_pch |
173 , xlim = my_xlim | 183 , col = my_col |
174 , ylim = my_ylim | |
175 , xlab = my_xlab | |
176 , ylab = my_ylab | |
177 , main = main_label | |
178 , cex.main = main_cex | |
179 , cex = cex | |
180 , pch = 16 | |
181 , col = my_col | |
182 ) | |
183 low_x <- -0.7 * lim_x | |
184 high_x <- 0.7 * lim_x | |
185 if (projection == 1 && !cplot_x) { | |
186 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") | |
187 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") | |
188 } | |
189 if ( x_show_labels != "0" ) { | |
190 names(my_load_distal) <- tsv1$featureID | |
191 names(my_load_proximal) <- tsv1$featureID | |
192 if ( x_show_labels == "ALL" ) { | |
193 n_labels <- length(my_load_distal) | |
194 } else { | |
195 n_labels <- as.numeric(x_show_labels) | |
196 } | |
197 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) | |
198 labels_to_show <- c( | |
199 names(head(sort(my_load_distal),n = n_labels)) | |
200 , names(tail(sort(my_load_distal),n = n_labels)) | |
201 ) | |
202 labels <- unname( | |
203 sapply( | |
204 X = tsv1$featureID | |
205 , FUN = function(x) if( x %in% labels_to_show ) x else "" | |
206 ) | 184 ) |
207 ) | 185 low_x <- -0.7 * lim_x |
208 x_text_offset <- 0.024 | 186 high_x <- 0.7 * lim_x |
209 y_text_off <- 0.017 | 187 if (projection == 1 && !cplot_x) { |
210 if (!cplot_x) { # S-plot | 188 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") |
211 y_text_offset <- if (projection == 1) -y_text_off else y_text_off | 189 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") |
212 } else { # C-plot | 190 } |
213 y_text_offset <- | 191 if ( x_show_labels != "0" ) { |
214 sapply( | 192 names(my_load_distal) <- tsv1$featureID |
215 X = (my_y > 0) | 193 names(my_load_proximal) <- tsv1$featureID |
216 , FUN = function(x) { if (x) y_text_off else -y_text_off } | 194 if ( x_show_labels == "ALL" ) { |
195 n_labels <- length(my_load_distal) | |
196 } else { | |
197 n_labels <- as.numeric(x_show_labels) | |
198 } | |
199 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) | |
200 labels_to_show <- c( | |
201 names(head(sort(my_load_distal), n = n_labels)) | |
202 , names(tail(sort(my_load_distal), n = n_labels)) | |
217 ) | 203 ) |
218 } | 204 labels <- unname( |
219 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { | 205 sapply( |
220 # print("str(x_arg)") | 206 X = tsv1$featureID |
221 # print(str(x_arg)) | 207 , FUN = function(x) if ( x %in% labels_to_show ) x else "" |
222 # print("str(y_arg)") | |
223 # print(str(y_arg)) | |
224 # print("str(labels_arg)") | |
225 # print(str(labels_arg)) | |
226 if (length(labels_arg) > 0) { | |
227 unique_slant <- unique(slant_arg) | |
228 if (length(unique_slant) == 1) { | |
229 text( | |
230 y = y_arg | |
231 , x = x_arg + x_text_offset | |
232 , cex = 0.4 | |
233 , labels = labels_arg | |
234 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | |
235 , srt = slant_arg | |
236 , adj = 0 # left-justified | |
237 ) | 208 ) |
209 ) | |
210 x_text_offset <- 0.024 | |
211 y_text_off <- 0.017 | |
212 if (!cplot_x) { | |
213 # S-plot | |
214 y_text_offset <- if (projection == 1) -y_text_off else y_text_off | |
238 } else { | 215 } else { |
239 for (slant in unique_slant) { | 216 # C-plot |
240 text( | 217 y_text_offset <- |
241 y = y_arg[slant_arg == slant] | 218 sapply( |
242 , x = x_arg[slant_arg == slant] + x_text_offset | 219 X = (my_y > 0) |
243 , cex = 0.4 | 220 , FUN = function(x) { |
244 , labels = labels_arg[slant_arg == slant] | 221 if (x) y_text_off else -y_text_off |
245 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | 222 } |
246 , srt = slant | 223 ) |
247 , adj = 0 # left-justified | 224 } |
225 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { | |
226 if (length(labels_arg) > 0) { | |
227 unique_slant <- unique(slant_arg) | |
228 if (length(unique_slant) == 1) { | |
229 text( | |
230 y = y_arg | |
231 , x = x_arg + x_text_offset | |
232 , cex = 0.4 | |
233 , labels = labels_arg | |
234 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | |
235 , srt = slant_arg | |
236 , adj = 0 # left-justified | |
237 ) | |
238 } else { | |
239 for (slant in unique_slant) { | |
240 text( | |
241 y = y_arg[slant_arg == slant] | |
242 , x = x_arg[slant_arg == slant] + x_text_offset | |
243 , cex = 0.4 | |
244 , labels = labels_arg[slant_arg == slant] | |
245 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | |
246 , srt = slant | |
247 , adj = 0 # left-justified | |
248 ) | |
249 } | |
250 } | |
251 } | |
252 } | |
253 if (!cplot_x) { | |
254 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant | |
255 } else { | |
256 my_slant <- sapply( | |
257 X = (my_y > 0) | |
258 , FUN = function(x) if (x) 2 else -2 | |
259 ) * my_feature_label_slant | |
260 } | |
261 if (length(my_x) > 1) { | |
262 label_features( | |
263 x_arg = my_x [my_x > 0] | |
264 , y_arg = my_y [my_x > 0] - y_text_offset | |
265 , labels_arg = labels[my_x > 0] | |
266 , slant_arg = (if (!cplot_x) -my_slant else (my_slant)) | |
267 ) | |
268 if (!cplot_x) { | |
269 label_features( | |
270 x_arg = my_x [my_x < 0] | |
271 , y_arg = my_y [my_x < 0] + y_text_offset | |
272 , labels_arg = labels[my_x < 0] | |
273 , slant_arg = my_slant | |
248 ) | 274 ) |
249 } | 275 } |
250 } | 276 } else { |
251 } | 277 if (!cplot_x) { |
252 } | 278 my_slant <- (if (my_x > 1) -1 else 1) * my_slant |
253 if (!cplot_x) { | 279 my_y_arg <- my_y + (if (my_x > 1) -1 else 1) * y_text_offset |
254 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant | 280 } else { |
281 my_slant <- (if (my_y > 1) -1 else 1) * my_slant | |
282 my_y_arg <- my_y + (if (my_y > 1) -1 else 1) * y_text_offset | |
283 } | |
284 label_features( | |
285 x_arg = my_x | |
286 , y_arg = my_y_arg | |
287 , labels_arg = labels | |
288 , slant_arg = my_slant | |
289 ) | |
290 } # end if (length(my_x) > 1) | |
291 } # end if ( x_show_labels != "0" ) | |
255 } else { | 292 } else { |
256 my_slant <- sapply( | 293 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") |
257 X = (my_y > 0) | 294 text(x=1, y=1, labels="no S-plot is possible") |
258 , FUN = function(x) if (x) 2 else -2 | 295 } # end if (sum(is.infinte(my_xlim))==0) |
259 ) * my_feature_label_slant | 296 } else { |
260 } | 297 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") |
261 if (length(my_x) > 1) { | 298 text(x=1, y=1, labels="no S-plot is possible") |
262 label_features( | 299 } # end if (length(plus_cor) != 0 && length(plus_cor) != 0) |
263 x_arg = my_x [my_x > 0] | 300 } # end action |
264 , y_arg = my_y [my_x > 0] - y_text_offset | 301 ) # end with( my_cor_vs_cov, action ) |
265 , labels_arg = labels[my_x > 0] | |
266 , slant_arg = (if (!cplot_x) -my_slant else (my_slant)) | |
267 ) | |
268 if (!cplot_x) { | |
269 label_features( | |
270 x_arg = my_x [my_x < 0] | |
271 , y_arg = my_y [my_x < 0] + y_text_offset | |
272 , labels_arg = labels[my_x < 0] | |
273 , slant_arg = my_slant | |
274 ) | |
275 } | |
276 } else { | |
277 if (!cplot_x) { | |
278 my_slant <- (if (my_x > 1) -1 else 1) * my_slant | |
279 my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset | |
280 } else { | |
281 my_slant <- (if (my_y > 1) -1 else 1) * my_slant | |
282 my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset | |
283 } | |
284 label_features( | |
285 x_arg = my_x | |
286 , y_arg = my_y_arg | |
287 , labels_arg = labels | |
288 , slant_arg = my_slant | |
289 ) | |
290 } | |
291 } | |
292 } | |
293 ) | |
294 return (my_cor_vs_cov) | 302 return (my_cor_vs_cov) |
295 } | 303 } # end function do_s_plot |
296 my_cor_vs_cov <- do_s_plot( | 304 my_cor_vs_cov <- do_s_plot( |
297 x_env = x_env | 305 x_env = x_env |
298 , predictor_projection_x = TRUE | 306 , predictor_projection_x = TRUE |
299 , cplot_x = FALSE | 307 , cplot_x = FALSE |
300 ) | 308 ) |
301 typeVc <- c("correlation", # 1 | 309 typevc <- c("correlation", # 1 |
302 "outlier", # 2 | 310 "outlier", # 2 |
303 "overview", # 3 | 311 "overview", # 3 |
304 "permutation", # 4 | 312 "permutation", # 4 |
305 "predict-train", # 5 | 313 "predict-train", # 5 |
306 "predict-test", # 6 | 314 "predict-test", # 6 |
310 "x-variance", # 10 | 318 "x-variance", # 10 |
311 "xy-score", # 11 | 319 "xy-score", # 11 |
312 "xy-weight" # 12 | 320 "xy-weight" # 12 |
313 ) # [c(3,8,9)] # [c(4,3,8,9)] | 321 ) # [c(3,8,9)] # [c(4,3,8,9)] |
314 if ( length(my_oplsda@orthoVipVn) > 0 ) { | 322 if ( length(my_oplsda@orthoVipVn) > 0 ) { |
315 my_typevc <- typeVc[c(9,3,8)] | 323 my_typevc <- typevc[c(9,3,8)] |
316 } else { | 324 } else { |
317 my_typevc <- c("(dummy)","overview","(dummy)") | 325 my_typevc <- c("(dummy)","overview","(dummy)") |
318 } | 326 } |
319 my_ortho_cor_vs_cov_exists <- FALSE | 327 my_ortho_cor_vs_cov_exists <- FALSE |
320 for (my_type in my_typevc) { | 328 for (my_type in my_typevc) { |
321 if (my_type %in% typeVc) { | 329 if (my_type %in% typevc) { |
322 tryCatch({ | 330 tryCatch({ |
323 if ( my_type != "x-loading" ) { | 331 if ( my_type != "x-loading" ) { |
324 plot( | 332 plot( |
325 x = my_oplsda | 333 x = my_oplsda |
326 , typeVc = my_type | 334 , typeVc = my_type |
327 , parCexN = 0.4 | 335 , parCexN = 0.4 |
328 , parDevNewL = FALSE | 336 , parDevNewL = FALSE |
329 , parLayL = TRUE | 337 , parLayL = TRUE |
330 , parEllipsesL = TRUE | 338 , parEllipsesL = TRUE |
331 ) | 339 ) |
332 if (my_type == "overview") { | 340 if (my_type == "overview") { |
333 sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2) | 341 sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2) |
334 title(sub = sub_label) | 342 title(sub = sub_label) |
335 } | 343 } |
336 } else { | 344 } else { |
337 my_ortho_cor_vs_cov <- do_s_plot( | 345 my_ortho_cor_vs_cov <- do_s_plot( |
338 x_env = x_env | 346 x_env = x_env |
339 , predictor_projection_x = FALSE | 347 , predictor_projection_x = FALSE |
340 , cplot_x = FALSE | 348 , cplot_x = FALSE |
341 ) | 349 ) |
342 my_ortho_cor_vs_cov_exists <- TRUE | 350 my_ortho_cor_vs_cov_exists <- TRUE |
351 } | |
343 } | 352 } |
344 }, error = function(e) { | 353 , error = function(e) { |
345 x_progress( | 354 x_progress( |
346 sprintf( | 355 sprintf( |
347 "factor level %s or %s may have only one sample - %s" | 356 "factor level %s or %s may have only one sample - %s" |
348 , fctr_lvl_1 | 357 , fctr_lvl_1 |
349 , fctr_lvl_2 | 358 , fctr_lvl_2 |
358 } | 367 } |
359 cplot_p <- x_env$cplot_p | 368 cplot_p <- x_env$cplot_p |
360 cplot_o <- x_env$cplot_o | 369 cplot_o <- x_env$cplot_o |
361 if (cplot_p || cplot_o) { | 370 if (cplot_p || cplot_o) { |
362 if (cplot_p) { | 371 if (cplot_p) { |
363 do_s_plot( | 372 if (!is.null(my_cor_vs_cov)) { |
364 x_env = x_env | 373 do_s_plot( |
365 , predictor_projection_x = TRUE | 374 x_env = x_env |
366 , cplot_x = TRUE | 375 , predictor_projection_x = TRUE |
367 , cor_vs_cov_x = my_cor_vs_cov | 376 , cplot_x = TRUE |
368 ) | 377 , cor_vs_cov_x = my_cor_vs_cov |
378 ) | |
379 } else { | |
380 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") | |
381 text(x=1, y=1, labels="no predictor projection is possible") | |
382 } | |
369 did_plots <- 1 | 383 did_plots <- 1 |
370 } else { | 384 } else { |
371 did_plots <- 0 | 385 did_plots <- 0 |
372 } | 386 } |
373 if (cplot_o) { | 387 if (cplot_o) { |
416 return ( FALSE ) | 430 return ( FALSE ) |
417 } | 431 } |
418 | 432 |
419 # extract parameters from the environment | 433 # extract parameters from the environment |
420 vrbl_metadata <- calc_env$vrbl_metadata | 434 vrbl_metadata <- calc_env$vrbl_metadata |
421 vrbl_metadata_names <- vrbl_metadata[,1] | 435 vrbl_metadata_names <- vrbl_metadata[, 1] |
422 smpl_metadata <- calc_env$smpl_metadata | 436 smpl_metadata <- calc_env$smpl_metadata |
423 data_matrix <- calc_env$data_matrix | 437 data_matrix <- calc_env$data_matrix |
424 pairSigFeatOnly <- calc_env$pairSigFeatOnly | 438 pair_significant_features_only <- calc_env$pairSigFeatOnly |
425 facC <- calc_env$facC | 439 facC <- calc_env$facC |
426 tesC <- calc_env$tesC | 440 tesC <- calc_env$tesC |
427 # extract the levels from the environment | 441 # extract the levels from the environment |
428 originalLevCSV <- levCSV <- calc_env$levCSV | 442 originalLevCSV <- levCSV <- calc_env$levCSV |
429 # matchingC is one of { "none", "wildcard", "regex" } | 443 # matchingC is one of { "none", "wildcard", "regex" } |
430 matchingC <- calc_env$matchingC | 444 matchingC <- calc_env$matchingC |
431 labelFeatures <- calc_env$labelFeatures | 445 labelFeatures <- calc_env$labelFeatures |
446 minCrossvalI <- as.integer(calc_env$min_crossval_i) | |
432 | 447 |
433 # arg/env checking | 448 # arg/env checking |
434 if (!(facC %in% names(smpl_metadata))) { | 449 if (!(facC %in% names(smpl_metadata))) { |
435 failure_action( | 450 failure_action( |
436 sprintf("bad parameter! Factor name '%s' not found in sampleMetadata" | 451 sprintf("bad parameter! Factor name '%s' not found in sampleMetadata" |
444 | 459 |
445 rt <- vrbl_metadata$rt | 460 rt <- vrbl_metadata$rt |
446 names(rt) <- vrbl_metadata$variableMetadata | 461 names(rt) <- vrbl_metadata$variableMetadata |
447 rt_lookup <- function(feature) unname(rt[feature]) | 462 rt_lookup <- function(feature) unname(rt[feature]) |
448 | 463 |
449 # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv) | 464 # calculate salience_df as data.frame(feature, max_level, max_median, salience_rcv, mean_median, salience, salience_rcv) |
450 salience_df <- calc_env$salience_df <- w4msalience( | 465 salience_df <- calc_env$salience_df <- w4msalience( |
451 data_matrix = data_matrix | 466 data_matrix = data_matrix |
452 , sample_class = smpl_metadata[,facC] | 467 , sample_class = smpl_metadata[,facC] |
453 , failure_action = failure_action | 468 , failure_action = failure_action |
454 ) | 469 ) |
455 salience_tsv_action({ | 470 salience_tsv_action({ |
456 my_df <- data.frame( | 471 with ( |
457 featureID = salience_df$feature | 472 salience_df |
458 , salientLevel = salience_df$max_level | 473 , { |
459 , salientRCV = salience_df$salient_rcv | 474 my_df <<- data.frame( |
460 , salience = salience_df$salience | 475 featureID = feature |
461 , mz = mz_lookup(salience_df$feature) | 476 , salientLevel = max_level |
462 , rt = rt_lookup(salience_df$feature) | 477 , salienceRCV = salience_rcv |
463 ) | 478 , relativeSalientDistance = relative_salient_distance |
464 my_df[order(-my_df$salience),] | 479 , salience = salience |
480 , mz = mz_lookup(feature) | |
481 , rt = rt_lookup(feature) | |
482 ) | |
483 }) | |
484 my_df[order(-my_df$relativeSalientDistance),] | |
465 }) | 485 }) |
466 | 486 |
467 # transform wildcards to regexen | 487 # transform wildcards to regexen |
468 if (matchingC == "wildcard") { | 488 if (matchingC == "wildcard") { |
469 # strsplit(x = "hello,wild,world", split = ",") | 489 # strsplit(x = "hello,wild,world", split = ",") |
571 ) | 591 ) |
572 my_cor_cov <- do_detail_plot( | 592 my_cor_cov <- do_detail_plot( |
573 x_dataMatrix = my_matrix | 593 x_dataMatrix = my_matrix |
574 , x_predictor = predictor | 594 , x_predictor = predictor |
575 , x_is_match = TRUE | 595 , x_is_match = TRUE |
576 , x_algorithm = algoC | 596 , x_algorithm = "nipals" |
577 , x_prefix = if (pairSigFeatOnly) { | 597 , x_prefix = if (pair_significant_features_only) { |
578 "Significantly contrasting features" | 598 "Significantly contrasting features" |
579 } else { | 599 } else { |
580 "Significant features" | 600 "Significant features" |
581 } | 601 } |
582 , x_show_labels = labelFeatures | 602 , x_show_labels = labelFeatures |
583 , x_progress = progress_action | 603 , x_progress = progress_action |
584 , x_crossval_i = min(7, length(chosen_samples)) | 604 , x_crossval_i = min(minCrossvalI, length(chosen_samples)) |
585 , x_env = calc_env | 605 , x_env = calc_env |
586 ) | 606 ) |
587 if ( is.null(my_cor_cov) ) { | 607 if ( is.null(my_cor_cov) ) { |
588 progress_action("NOTHING TO PLOT") | 608 progress_action("NOTHING TO PLOT") |
589 } else { | 609 } else { |
638 } else { | 658 } else { |
639 1 | 659 1 |
640 } | 660 } |
641 ) | 661 ) |
642 col_selector <- vrbl_metadata_names[ | 662 col_selector <- vrbl_metadata_names[ |
643 if ( pairSigFeatOnly ) fully_significant else overall_significant | 663 if (pair_significant_features_only) fully_significant else overall_significant |
644 ] | 664 ] |
645 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] | 665 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] |
646 my_cor_cov <- do_detail_plot( | 666 my_cor_cov <- do_detail_plot( |
647 x_dataMatrix = my_matrix | 667 x_dataMatrix = my_matrix |
648 , x_predictor = predictor | 668 , x_predictor = predictor |
649 , x_is_match = is_match | 669 , x_is_match = is_match |
650 , x_algorithm = algoC | 670 , x_algorithm = "nipals" |
651 , x_prefix = if (pairSigFeatOnly) { | 671 , x_prefix = if (pair_significant_features_only) { |
652 "Significantly contrasting features" | 672 "Significantly contrasting features" |
653 } else { | 673 } else { |
654 "Significant features" | 674 "Significant features" |
655 } | 675 } |
656 , x_show_labels = labelFeatures | 676 , x_show_labels = labelFeatures |
657 , x_progress = progress_action | 677 , x_progress = progress_action |
658 , x_crossval_i = min(7, length(chosen_samples)) | 678 , x_crossval_i = min(minCrossvalI, length(chosen_samples)) |
659 , x_env = calc_env | 679 , x_env = calc_env |
660 ) | 680 ) |
661 if ( is.null(my_cor_cov) ) { | 681 if ( is.null(my_cor_cov) ) { |
662 progress_action("NOTHING TO PLOT.") | 682 progress_action("NOTHING TO PLOT.") |
663 } else { | 683 } else { |
679 ) | 699 ) |
680 } | 700 } |
681 } | 701 } |
682 } | 702 } |
683 } | 703 } |
684 } else { # tesC == "none" | 704 } else { |
705 # tesC == "none" | |
685 # find all the levels for factor facC in sampleMetadata | 706 # find all the levels for factor facC in sampleMetadata |
686 level_union <- unique(sort(smpl_metadata_facC)) | 707 level_union <- unique(sort(smpl_metadata_facC)) |
687 # identify the selected levels for factor facC from sampleMetadata | 708 # identify the selected levels for factor facC from sampleMetadata |
688 level_include <- sapply(X = level_union, FUN = isLevelSelected) | 709 level_include <- sapply(X = level_union, FUN = isLevelSelected) |
689 # discard the non-selected levels for factor facC | 710 # discard the non-selected levels for factor facC |
697 , FUN = function(x) { | 718 , FUN = function(x) { |
698 fctr_lvl_1 <- x[1] | 719 fctr_lvl_1 <- x[1] |
699 fctr_lvl_2 <- { | 720 fctr_lvl_2 <- { |
700 if ( fctr_lvl_1 %in% completed ) | 721 if ( fctr_lvl_1 %in% completed ) |
701 return("DUMMY") | 722 return("DUMMY") |
702 # strF(completed) | |
703 completed <<- c(completed, fctr_lvl_1) | 723 completed <<- c(completed, fctr_lvl_1) |
704 setdiff(level_union, fctr_lvl_1) | 724 setdiff(level_union, fctr_lvl_1) |
705 } | 725 } |
706 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) | 726 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) |
707 fctr_lvl_2 <- "other" | 727 fctr_lvl_2 <- "other" |
728 ) | 748 ) |
729 my_cor_cov <- do_detail_plot( | 749 my_cor_cov <- do_detail_plot( |
730 x_dataMatrix = my_matrix | 750 x_dataMatrix = my_matrix |
731 , x_predictor = predictor | 751 , x_predictor = predictor |
732 , x_is_match = is_match | 752 , x_is_match = is_match |
733 , x_algorithm = algoC | 753 , x_algorithm = "nipals" |
734 , x_prefix = "Features" | 754 , x_prefix = "Features" |
735 , x_show_labels = labelFeatures | 755 , x_show_labels = labelFeatures |
736 , x_progress = progress_action | 756 , x_progress = progress_action |
737 , x_crossval_i = min(7, length(chosen_samples)) | 757 , x_crossval_i = min(minCrossvalI, length(chosen_samples)) |
738 , x_env = calc_env | 758 , x_env = calc_env |
739 ) | 759 ) |
740 if ( is.null(my_cor_cov) ) { | 760 if ( is.null(my_cor_cov) ) { |
741 progress_action("NOTHING TO PLOT...") | 761 progress_action("NOTHING TO PLOT...") |
742 } else { | 762 } else { |
781 ) | 801 ) |
782 my_cor_cov <- do_detail_plot( | 802 my_cor_cov <- do_detail_plot( |
783 x_dataMatrix = my_matrix | 803 x_dataMatrix = my_matrix |
784 , x_predictor = predictor | 804 , x_predictor = predictor |
785 , x_is_match = is_match | 805 , x_is_match = is_match |
786 , x_algorithm = algoC | 806 , x_algorithm = "nipals" |
787 , x_prefix = "Features" | 807 , x_prefix = "Features" |
788 , x_show_labels = labelFeatures | 808 , x_show_labels = labelFeatures |
789 , x_progress = progress_action | 809 , x_progress = progress_action |
790 , x_crossval_i = min(7, length(chosen_samples)) | 810 , x_crossval_i = min(minCrossvalI, length(chosen_samples)) |
791 , x_env = calc_env | 811 , x_env = calc_env |
792 ) | 812 ) |
793 if ( is.null(my_cor_cov) ) { | 813 if ( is.null(my_cor_cov) ) { |
794 progress_action("NOTHING TO PLOT.....") | 814 progress_action("NOTHING TO PLOT.....") |
795 } else { | 815 } else { |
815 } | 835 } |
816 } | 836 } |
817 if (!did_plot) { | 837 if (!did_plot) { |
818 failure_action( | 838 failure_action( |
819 sprintf( | 839 sprintf( |
820 "bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'" | 840 "Did not plot. Does sampleMetadata have at least two levels of factor '%s' matching '%s' ?" |
821 , facC, originalLevCSV)) | 841 , facC, originalLevCSV)) |
822 return ( FALSE ) | 842 return ( FALSE ) |
823 } | 843 } |
824 return ( TRUE ) | 844 return ( TRUE ) |
825 } | 845 } |
832 cor_vs_cov <- function( | 852 cor_vs_cov <- function( |
833 matrix_x | 853 matrix_x |
834 , ropls_x | 854 , ropls_x |
835 , predictor_projection_x = TRUE | 855 , predictor_projection_x = TRUE |
836 , x_progress = print | 856 , x_progress = print |
857 , x_env | |
837 ) { | 858 ) { |
838 tryCatch({ | 859 tryCatch({ |
839 return( | 860 return( |
840 cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress) | 861 cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress, x_env) |
841 ) | 862 ) |
842 }, error = function(e) { | 863 } |
864 , error = function(e) { | |
843 x_progress( | 865 x_progress( |
844 sprintf( | 866 sprintf( |
845 "cor_vs_cov fatal error - %s" | 867 "cor_vs_cov fatal error - %s" |
846 , as.character(e) # e$message | 868 , as.character(e) # e$message |
847 ) | 869 ) |
849 return ( NULL ) | 871 return ( NULL ) |
850 }) | 872 }) |
851 } | 873 } |
852 | 874 |
853 cor_vs_cov_try <- function( | 875 cor_vs_cov_try <- function( |
854 matrix_x | 876 matrix_x # rows are samples; columns, features |
855 , ropls_x | 877 , ropls_x # an instance of ropls::opls |
856 , predictor_projection_x = TRUE | 878 , predictor_projection_x = TRUE # TRUE for predictor projection; FALSE for orthogonal projection |
857 , x_progress = print | 879 , x_progress = print # function to produce progress and error messages |
880 , x_env | |
858 ) { | 881 ) { |
882 my_matrix_x <- matrix_x | |
883 my_matrix_x[my_matrix_x==0] <- NA | |
884 fdr_features <- x_env$fdr_features | |
885 | |
859 x_class <- class(ropls_x) | 886 x_class <- class(ropls_x) |
860 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) | 887 if ( !( as.character(x_class) == "opls" ) ) { |
861 stop( | 888 stop( |
862 paste( | 889 paste( |
863 "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class " | 890 "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class " |
864 , as.character(x_class) | 891 , as.character(x_class) |
865 ) | 892 ) |
866 ) | 893 ) |
867 } | 894 } |
895 if ( !ropls_x@suppLs$algoC == "nipals" ) { | |
896 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS | |
897 stop( | |
898 paste( | |
899 "cor_vs_cov: Expected ropls::opls instance to have been computed by the NIPALS algorithm rather than " | |
900 , ropls_x@suppLs$algoC | |
901 ) | |
902 ) | |
903 } | |
868 result <- list() | 904 result <- list() |
869 result$projection <- projection <- if (predictor_projection_x) 1 else 2 | 905 result$projection <- projection <- if (predictor_projection_x) 1 else 2 |
870 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS | 906 |
871 if ( ropls_x@suppLs$algoC == "nipals") { | 907 # I used equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 |
872 # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 | 908 # (and not from the supplement despite the statement that, for the NIPALS algorithm, |
873 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) | 909 # the equations from the supplement should be used) because of the definition of the |
874 mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) | 910 # Pearson/Galton coefficient of correlation is defined as |
875 if (predictor_projection_x) | 911 # $$ |
876 score_matrix <- ropls_x@scoreMN | 912 # \rho_{X,Y}= \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y} |
877 else | 913 # $$ |
878 score_matrix <- ropls_x@orthoScoreMN | 914 # as described (among other places) on Wikipedia at |
879 score_matrix_transposed <- t(score_matrix) | 915 # https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_population |
880 score_matrix_magnitude <- mag(score_matrix) | 916 # The equations in the supplement said to use, for the predictive component t1, |
881 result$covariance <- | 917 # \rho_{t1,X_i}= \frac{\operatorname{cov}(t1,X_i)}{(\operatorname{mag}(t1))(\operatorname{mag}(X_i))} |
882 score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) | 918 # but the results that I got were dramatically different from published results for S-PLOTs; |
883 result$correlation <- | 919 # perhaps my data are not centered exactly the same way that theirs were. |
884 score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) | 920 # The correlations calculated here are in agreement with those calculated with the code from |
921 # page 22 of https://cran.r-project.org/web/packages/muma/muma.pdf | |
922 | |
923 | |
924 # count the features/variables (one column for each sample) | |
925 # count the features/variables (one column for each sample) | |
926 n_features <- ncol(my_matrix_x) | |
927 all_n_features <- x_env$fdr_features | |
928 if (length(grep("^[0-9][0-9]*$", all_n_features)) > 0) { | |
929 all_n_features <- as.integer(all_n_features) | |
885 } else { | 930 } else { |
886 # WARNING - untested code - I don't have test data to exercise this branch | 931 all_n_features <- n_features |
887 # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 | 932 } |
888 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F | 933 fdr_n_features <- max(n_features, all_n_features) |
889 if (predictor_projection_x) | 934 # print("n_features") |
890 score_matrix <- ropls_x@scoreMN | 935 # print(n_features) |
891 else | 936 |
892 score_matrix <- ropls_x@orthoScoreMN | 937 # count the samples/observations (one row for each sample) |
893 score_matrix_transposed <- t(score_matrix) | 938 n_observations <- nrow(my_matrix_x) |
894 cov_divisor <- nrow(matrix_x) - 1 | 939 |
895 result$covariance <- sapply( | 940 # choose whether to plot the predictive score vector or orthogonal score vector |
896 X = 1:ncol(matrix_x) | 941 if (predictor_projection_x) |
897 , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor | 942 score_vector <- ropls_x@scoreMN |
943 else | |
944 score_vector <- ropls_x@orthoScoreMN | |
945 | |
946 # compute the covariance of each feature with the score vector | |
947 result$covariance <- | |
948 sapply( | |
949 X = 1:n_features | |
950 , FUN = function(i) { | |
951 cov(score_vector, my_matrix_x[ , i, drop = TRUE], use = "pairwise.complete.obs") | |
952 } | |
898 ) | 953 ) |
899 score_sd <- sapply( | 954 # access covariance by feature name |
900 X = 1:ncol(score_matrix) | 955 names(result$covariance) <- colnames(my_matrix_x) |
901 , FUN = function(x) sd(score_matrix[,x]) | 956 |
957 # compute the correlation of each feature with the score vector | |
958 result$correlation <- | |
959 sapply( | |
960 X = 1:n_features | |
961 , FUN = function(i) { | |
962 cor(score_vector, my_matrix_x[ , i, drop = TRUE], use = "pairwise.complete.obs") | |
963 } | |
902 ) | 964 ) |
903 # xSdVn - Numerical vector: variable standard deviations of the 'x' matrix | 965 # access correlation by feature name |
904 xSdVn <- ropls_x@xSdVn | 966 names(result$correlation) <- colnames(my_matrix_x) |
905 result$correlation <- sapply( | 967 |
906 X = 1:ncol(matrix_x) | 968 # eliminate NAs in either correlation or covariance |
907 , FUN = function(x) { | 969 nas <- is.na(result$correlation) | is.na(result$covariance) |
908 ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) ) | 970 featureID <- names(ropls_x@vipVn) |
909 } | 971 featureID <- featureID[!nas] |
910 ) | 972 result$correlation <- result$correlation[!nas] |
911 } | 973 result$covariance <- result$covariance[!nas] |
912 result$correlation <- result$correlation[ 1, , drop = TRUE ] | 974 n_features <- length(featureID) |
913 result$covariance <- result$covariance [ 1, , drop = TRUE ] | 975 |
914 | 976 correl_pci <- lapply( |
915 # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014 | 977 X = 1:n_features |
978 , FUN = function(i) { | |
979 correl.ci( | |
980 r = result$correlation[i] | |
981 , n_obs = n_observations | |
982 , n_vars = fdr_n_features | |
983 ) | |
984 } | |
985 ) | |
986 result$p_value_raw <- sapply( | |
987 X = 1:n_features | |
988 , FUN = function(i) correl_pci[[i]]$p.value.raw | |
989 ) | |
990 result$p_value_raw[is.na(result$p_value_raw)] <- 1.0 | |
991 result$p_value <- sapply( | |
992 X = 1:n_features | |
993 , FUN = function(i) correl_pci[[i]]$p.value | |
994 ) | |
995 result$p_value[is.na(result$p_value)] <- 1.0 | |
996 result$ci_lower <- sapply( | |
997 X = 1:n_features | |
998 , FUN = function(i) correl_pci[[i]]$CI["lower"] | |
999 ) | |
1000 result$ci_upper <- sapply( | |
1001 X = 1:n_features | |
1002 , FUN = function(i) correl_pci[[i]]$CI["upper"] | |
1003 ) | |
1004 | |
1005 | |
1006 # extract "variant 4 of Variable Influence on Projection for OPLS" (see Galindo_Prieto_2014, DOI 10.1002/cem.2627) | |
916 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) | 1007 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) |
917 result$vip4p <- as.numeric(ropls_x@vipVn) | 1008 result$vip4p <- as.numeric(ropls_x@vipVn)[!nas] |
918 result$vip4o <- as.numeric(ropls_x@orthoVipVn) | 1009 result$vip4o <- as.numeric(ropls_x@orthoVipVn)[!nas] |
919 result$loadp <- as.numeric(ropls_x@loadingMN) | 1010 if (length(result$vip4o) == 0) result$vip4o <- NA |
920 result$loado <- as.numeric(ropls_x@orthoLoadingMN) | 1011 # extract the loadings |
1012 result$loadp <- as.numeric(ropls_x@loadingMN)[!nas] | |
1013 result$loado <- as.numeric(ropls_x@orthoLoadingMN)[!nas] | |
921 # get the level names | 1014 # get the level names |
922 level_names <- sort(levels(as.factor(ropls_x@suppLs$y))) | 1015 level_names <- sort(levels(as.factor(ropls_x@suppLs$y))) |
923 fctr_lvl_1 <- level_names[1] | 1016 fctr_lvl_1 <- level_names[1] |
924 fctr_lvl_2 <- level_names[2] | 1017 fctr_lvl_2 <- level_names[2] |
925 feature_count <- length(ropls_x@vipVn) | 1018 result$level1 <- rep.int(x = fctr_lvl_1, times = n_features) |
926 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) | 1019 result$level2 <- rep.int(x = fctr_lvl_2, times = n_features) |
927 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) | |
928 superresult <- list() | |
929 if (length(result$vip4o) == 0) result$vip4o <- NA | |
930 greaterLevel <- sapply( | 1020 greaterLevel <- sapply( |
931 X = result$correlation | 1021 X = result$correlation |
932 , FUN = function(my_corr) | 1022 , FUN = function(my_corr) { |
933 tryCatch({ | 1023 tryCatch({ |
934 if ( is.nan( my_corr ) ) { | 1024 if ( is.na(my_corr) || ! is.numeric( my_corr ) ) { |
935 print("my_corr is NaN") | 1025 NA |
936 NA | |
937 } else { | 1026 } else { |
938 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 | 1027 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 |
939 } | 1028 } |
940 }, error = function(e) { | 1029 } |
1030 , error = function(e) { | |
1031 print(my_corr) | |
941 x_progress( | 1032 x_progress( |
942 sprintf( | 1033 sprintf( |
943 "cor_vs_cov -> sapply: error - substituting NA - %s" | 1034 "cor_vs_cov -> sapply: error - substituting NA - %s" |
944 , as.character(e) | 1035 , as.character(e) |
945 ) | 1036 ) |
946 ) | 1037 ) |
947 NA | 1038 NA |
948 }) | 1039 } |
1040 ) | |
1041 } | |
949 ) | 1042 ) |
950 | 1043 |
951 # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 | 1044 # build a data frame to hold the content for the tab-separated values file |
952 featureID <- names(ropls_x@vipVn) | |
953 greaterLevel <- greaterLevel[featureID] | |
954 result$correlation <- result$correlation[featureID] | |
955 result$covariance <- result$covariance[featureID] | |
956 # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 | |
957 | |
958 tsv1 <- data.frame( | 1045 tsv1 <- data.frame( |
959 featureID = featureID | 1046 featureID = featureID |
960 , factorLevel1 = result$level1 | 1047 , factorLevel1 = result$level1 |
961 , factorLevel2 = result$level2 | 1048 , factorLevel2 = result$level2 |
962 , greaterLevel = greaterLevel | 1049 , greaterLevel = greaterLevel |
963 , projection = result$projection | 1050 , projection = result$projection |
964 , correlation = result$correlation | 1051 , correlation = result$correlation |
965 , covariance = result$covariance | 1052 , covariance = result$covariance |
966 , vip4p = result$vip4p | 1053 , vip4p = result$vip4p |
967 , vip4o = result$vip4o | 1054 , vip4o = result$vip4o |
968 , loadp = result$loadp | 1055 , loadp = result$loadp |
969 , loado = result$loado | 1056 , loado = result$loado |
970 , row.names = NULL | 1057 , cor_p_val_raw = result$p_value_raw |
1058 , cor_p_value = result$p_value | |
1059 , cor_ci_lower = result$ci_lower | |
1060 , cor_ci_upper = result$ci_upper | |
971 ) | 1061 ) |
972 tsv1 <- tsv1[!is.na(tsv1$correlation),] | 1062 rownames(tsv1) <- tsv1$featureID |
973 tsv1 <- tsv1[!is.na(tsv1$covariance),] | 1063 |
974 superresult$tsv1 <- tsv1 | 1064 # build the superresult, i.e., the result returned by this function |
975 rownames(superresult$tsv1) <- tsv1$featureID | 1065 superresult <- list() |
976 superresult$projection <- result$projection | 1066 superresult$projection <- result$projection |
977 superresult$covariance <- result$covariance | 1067 superresult$covariance <- result$covariance |
978 superresult$correlation <- result$correlation | 1068 superresult$correlation <- result$correlation |
979 superresult$vip4p <- result$vip4p | 1069 superresult$vip4p <- result$vip4p |
980 superresult$vip4o <- result$vip4o | 1070 superresult$vip4o <- result$vip4o |
981 superresult$loadp <- result$loadp | 1071 superresult$loadp <- result$loadp |
982 superresult$loado <- result$loado | 1072 superresult$loado <- result$loado |
1073 superresult$cor_p_value <- tsv1$cor_p_value | |
983 superresult$details <- result | 1074 superresult$details <- result |
984 result$superresult <- superresult | 1075 |
985 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways | 1076 # remove any rows having NA for covariance or correlation |
986 result$oplsda <- ropls_x | 1077 tsv1 <- tsv1[!is.na(tsv1$correlation),] |
987 result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways | 1078 tsv1 <- tsv1[!is.na(tsv1$covariance),] |
1079 superresult$tsv1 <- tsv1 | |
1080 | |
1081 # # I did not include these but left them commentd out in case future | |
1082 # # consumers of this routine want to use it in currently unanticipated ways | |
1083 # result$superresult <- superresult | |
1084 # result$oplsda <- ropls_x | |
1085 # result$predictor <- ropls_x@suppLs$y | |
1086 | |
988 return (superresult) | 1087 return (superresult) |
989 } | 1088 } |
990 | 1089 |
991 # vim: sw=2 ts=2 et : | 1090 # Code for correl.ci was adapted from correl function from: |
1091 # @book{ | |
1092 # Tsagris_2018, | |
1093 # author = {Tsagris, Michail}, | |
1094 # year = {2018}, | |
1095 # link = {https://www.researchgate.net/publication/324363311_Multivariate_data_analysis_in_R}, | |
1096 # title = {Multivariate data analysis in R} | |
1097 # } | |
1098 # which follows | |
1099 # https://en.wikipedia.org/wiki/Fisher_transformation#Definition | |
1100 | |
1101 correl.ci <- function(r, n_obs, n_vars, a = 0.05, rho = 0) { | |
1102 ## r is the calculated correlation coefficient for n_obs pairs of observations of one variable | |
1103 ## a is the significance level | |
1104 ## rho is the hypothesised correlation | |
1105 zh0 <- atanh(rho) # 0.5*log((1+rho)/(1-rho)), i.e., Fisher's z-transformation for Ho | |
1106 zh1 <- atanh(r) # 0.5*log((1+r)/(1-r)), i.e., Fisher's z-transformation for H1 | |
1107 se <- (1 - r^2)/sqrt(n_obs - 3) ## standard error for Fisher's z-transformation of Ho | |
1108 test <- (zh1 - zh0)/se ### test statistic | |
1109 pvalue <- 2*(1 - pnorm(abs(test))) ## p-value | |
1110 pvalue.adj <- p.adjust(p = pvalue, method = "BY", n = n_vars) | |
1111 z_L <- zh1 - qnorm(1 - a/2)*se | |
1112 z_h <- zh1 + qnorm(1 - a/2)*se | |
1113 fish_l <- tanh(z_L) # (exp(2*z_l)-1)/(exp(2*z_l)+1), i.e., lower confidence limit | |
1114 fish_h <- tanh(z_h) # (exp(2*z_h)-1)/(exp(2*z_h)+1), i.e., upper confidence limit | |
1115 ci <- c(fish_l, fish_h) | |
1116 names(ci) <- c("lower", "upper") | |
1117 list( | |
1118 correlation = r | |
1119 , p.value.raw = pvalue | |
1120 , p.value = pvalue.adj | |
1121 , CI = ci | |
1122 ) | |
1123 } | |
1124 | |
1125 # vim: sw=2 ts=2 et ai : |