comparison w4mcorcov_calc.R @ 7:ca9938f2eb6a draft default tip

"planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 5fd9687d543a48a715b1180caf93abebebd58b0e"
author eschen42
date Tue, 17 Nov 2020 23:29:59 +0000
parents 0b49916c5c52
children
comparison
equal deleted inserted replaced
6:0b49916c5c52 7:ca9938f2eb6a
1 # center with 'colMeans()' - ref: http://gastonsanchez.com/visually-enforced/how-to/2014/01/15/Center-data-in-R/ 1 # compute and output detail plots
2 center_colmeans <- function(x) {
3 xcenter = colMeans(x)
4 x - rep(xcenter, rep.int(nrow(x), ncol(x)))
5 }
6
7 #### OPLS-DA
8 algoC <- "nipals"
9
10 do_detail_plot <- function( 2 do_detail_plot <- function(
11 x_dataMatrix 3 x_dataMatrix
12 , x_predictor 4 , x_predictor
13 , x_is_match 5 , x_is_match
14 , x_algorithm 6 , x_algorithm
19 , x_crossval_i 11 , x_crossval_i
20 ) { 12 ) {
21 off <- function(x) if (x_show_labels == "0") 0 else x 13 off <- function(x) if (x_show_labels == "0") 0 else x
22 if ( x_is_match 14 if ( x_is_match
23 && ncol(x_dataMatrix) > 0 15 && ncol(x_dataMatrix) > 0
24 && length(unique(x_predictor))> 1 16 && length(unique(x_predictor)) > 1
25 && x_crossval_i < nrow(x_dataMatrix) 17 && x_crossval_i < nrow(x_dataMatrix)
26 ) { 18 ) {
27 my_oplsda <- opls( 19 my_oplsda <- opls(
28 x = x_dataMatrix 20 x = x_dataMatrix
29 , y = x_predictor 21 , y = x_predictor
30 , algoC = x_algorithm 22 , algoC = x_algorithm
31 , predI = 1 23 , predI = 1
32 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0 24 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0
33 , printL = FALSE 25 , fig.pdfC = 'none'
34 , plotL = FALSE 26 , info.txtC = 'none'
35 , crossvalI = x_crossval_i 27 , crossvalI = x_crossval_i
36 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2. 28 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2.
37 ) 29 )
38 # strip out variables having negligible variance 30 # strip out variables having negligible variance
39 x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE] 31 x_dataMatrix <- x_dataMatrix[ , names(my_oplsda@vipVn), drop = FALSE]
40 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) 32 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y))
41 # x_progress(strF(x_dataMatrix)) 33
42 # x_progress(strF(my_oplsda))
43 #x_progress(head(my_oplsda_suppLs_y_levels))
44 #x_progress(unique(my_oplsda_suppLs_y_levels))
45 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] 34 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1]
46 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] 35 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2]
47 do_s_plot <- function( 36 do_s_plot <- function(
48 x_env 37 x_env
49 , predictor_projection_x = TRUE 38 , predictor_projection_x = TRUE
50 , cplot_x = FALSE 39 , cplot_x = FALSE
51 , cor_vs_cov_x = NULL 40 , cor_vs_cov_x = NULL
52 ) 41 ) {
53 {
54 #print(ls(x_env)) # "cplot_y" etc
55 #print(str(x_env$cplot_y)) # chr "covariance"
56 if (cplot_x) { 42 if (cplot_x) {
57 #print(x_env$cplot_y) # "covariance"
58 cplot_y_correlation <- (x_env$cplot_y == "correlation") 43 cplot_y_correlation <- (x_env$cplot_y == "correlation")
59 #print(cplot_y_correlation) # FALSE 44 default_lim_x <- 10
45 } else {
46 default_lim_x <- 1.2
60 } 47 }
61 if (is.null(cor_vs_cov_x)) { 48 if (is.null(cor_vs_cov_x)) {
62 my_cor_vs_cov <- cor_vs_cov( 49 my_cor_vs_cov <- cor_vs_cov(
63 matrix_x = x_dataMatrix 50 matrix_x = x_dataMatrix
64 , ropls_x = my_oplsda 51 , ropls_x = my_oplsda
65 , predictor_projection_x = predictor_projection_x 52 , predictor_projection_x = predictor_projection_x
66 , x_progress 53 , x_progress
54 , x_env
67 ) 55 )
68 } else { 56 } else {
69 my_cor_vs_cov <- cor_vs_cov_x 57 my_cor_vs_cov <- cor_vs_cov_x
70 } 58 }
71 # print("str(my_cor_vs_cov)") 59
72 # str(my_cor_vs_cov)
73 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { 60 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) {
74 if (is.null(cor_vs_cov_x)) { 61 if (is.null(cor_vs_cov_x)) {
75 x_progress("No cor_vs_cov data produced") 62 x_progress(
63 sprintf("No cor_vs_cov data produced for level %s versus %s", fctr_lvl_1, fctr_lvl_2)
64 )
76 } 65 }
77 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") 66 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
78 text(x=1, y=1, labels="too few covariance data") 67 text(x=1, y=1, labels="too few covariance data")
79 return(my_cor_vs_cov) 68 return(my_cor_vs_cov)
80 } 69 }
82 my_cor_vs_cov 71 my_cor_vs_cov
83 , { 72 , {
84 min_x <- min(covariance, na.rm = TRUE) 73 min_x <- min(covariance, na.rm = TRUE)
85 max_x <- max(covariance, na.rm = TRUE) 74 max_x <- max(covariance, na.rm = TRUE)
86 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) 75 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs))
87 covariance <- covariance / lim_x 76
88 lim_x <- 1.2 77 # Regarding using VIP as a guide to selecting a biomarker:
89 # "It is generally accepted that a variable should be selected if vj>1, [27–29], 78 # "It is generally accepted that a variable should be selected if vj>1, [27–29],
90 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." 79 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
91 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) 80 # (Mehmood 2012 doi:10.1186/1748-7188-6-27)
92 plus_cor <- correlation 81 plus_cor <- correlation
93 plus_cov <- covariance 82 plus_cov <- covariance
94 cex <- 0.65 83 if (length(plus_cor) != 0 && length(plus_cor) != 0) {
95 which_projection <- if (projection == 1) "t1" else "o1" 84 cex <- 0.65
96 which_loading <- if (projection == 1) "parallel" else "orthogonal" 85 if (projection == 1) {
97 if (projection == 1) { # predictor-projection 86 # predictor-projection
98 vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) 87 vipcp <- pmax(0, pmin(1, (vip4p - 0.83) / (1.21 - 0.83)))
99 if (!cplot_x) { # S-plot predictor-projection 88 if (!cplot_x) {
100 my_xlab <- "relative covariance(feature,t1)" 89 # S-plot predictor-projection
101 my_x <- plus_cov 90 my_xlab <- "covariance(feature,t1)"
102 my_ylab <- "correlation(feature,t1)" 91 my_x <- plus_cov
103 my_y <- plus_cor
104 } else { # C-plot predictor-projection
105 my_xlab <- "variable importance in predictor-projection"
106 my_x <- vip4p
107 if (cplot_y_correlation) {
108 my_ylab <- "correlation(feature,t1)" 92 my_ylab <- "correlation(feature,t1)"
109 my_y <- plus_cor 93 my_y <- plus_cor
94 # X,Y limits for S-PLOT
95 my_xlim <- c( -lim_x, lim_x ) * (1.0 + off(0.3))
96 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) )
110 } else { 97 } else {
111 my_ylab <- "relative covariance(feature,t1)" 98 # C-plot predictor-projection
112 my_y <- plus_cov 99 my_xlab <- "variable importance in predictor-projection"
100 my_x <- vip4p
101 if (cplot_y_correlation) {
102 my_ylab <- "correlation(feature,t1)"
103 my_y <- plus_cor
104 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) )
105 } else {
106 my_ylab <- "covariance(feature,t1)"
107 my_y <- plus_cov
108 my_ylim <- max(abs(plus_cov))
109 my_ylim <- c( -my_ylim, my_ylim ) * (1.0 + off(0.2) )
110 }
111 # X,Y limits for C-plot
112 lim_x <- max(my_x, na.rm = TRUE) * 1.1
113 lim_x <- min(lim_x, default_lim_x)
114 my_xlim <- c( 0, lim_x ) # + off(0.2) )
113 } 115 }
114 } 116 my_load_distal <- loadp
115 if (cplot_x) { 117 my_load_proximal <- loado
116 lim_x <- max(my_x, na.rm = TRUE) * 1.1 118 red <- as.numeric(correlation > 0) * vipcp
117 my_xlim <- c( 0, lim_x + off(0.2) ) 119 blue <- as.numeric(correlation < 0) * vipcp
120 alpha <- 0.1 + 0.4 * vipcp
121 red[is.na(red)] <- 0
122 blue[is.na(blue)] <- 0
123 alpha[is.na(alpha)] <- 0
124 my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha)
125 main_label <- sprintf("%s for level %s versus %s"
126 , x_prefix, fctr_lvl_1, fctr_lvl_2)
118 } else { 127 } else {
119 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) 128 # orthogonal projection
120 } 129 vipco <- pmax(0, pmin(1, (vip4o - 0.83) / (1.21 - 0.83)))
121 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) 130 if (!cplot_x) {
122 my_load_distal <- loadp 131 # S-PLOT orthogonal-projection
123 my_load_proximal <- loado 132 my_xlab <- "covariance(feature,to1)"
124 red <- as.numeric(correlation > 0) * vipcp 133 my_x <- -plus_cov
125 blue <- as.numeric(correlation < 0) * vipcp 134 # X,Y limits for S-PLOT
126 alpha <- 0.1 + 0.4 * vipcp 135 my_xlim <- c( -lim_x, lim_x ) * (1.0 + off(0.3))
127 red[is.na(red)] <- 0
128 blue[is.na(blue)] <- 0
129 alpha[is.na(alpha)] <- 0
130 my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha)
131 main_label <- sprintf("%s for level %s versus %s"
132 , x_prefix, fctr_lvl_1, fctr_lvl_2)
133 } else { # orthogonal projection
134 vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83)))
135 if (!cplot_x) {
136 my_xlab <- "relative covariance(feature,to1)"
137 my_x <- -plus_cov
138 } else {
139 my_xlab <- "variable importance in orthogonal projection"
140 my_x <- vip4o
141 }
142 if (!cplot_x) { # S-plot orthogonal projection
143 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
144 my_ylab <- "correlation(feature,to1)"
145 my_y <- plus_cor
146 } else { # C-plot orthogonal projection
147 lim_x <- max(my_x, na.rm = TRUE) * 1.1
148 my_xlim <- c( 0, lim_x + off(0.2) )
149 if (cplot_y_correlation) {
150 my_ylab <- "correlation(feature,to1)" 136 my_ylab <- "correlation(feature,to1)"
151 my_y <- plus_cor 137 my_y <- plus_cor
138 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) )
152 } else { 139 } else {
153 my_ylab <- "relative covariance(feature,to1)" 140 # C-plot orthogonal-projection
154 my_y <- plus_cov 141 my_xlab <- "variable importance in orthogonal projection"
142 my_x <- vip4o
143 # C-plot orthogonal projection
144 # X,Y limits for C-plot
145 lim_x <- max(my_x, na.rm = TRUE) * 1.1
146 my_xlim <- c( 0, lim_x ) # + off(0.2) )
147 if (cplot_y_correlation) {
148 my_ylab <- "correlation(feature,to1)"
149 my_y <- plus_cor
150 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) )
151 } else {
152 my_ylab <- "covariance(feature,to1)"
153 my_y <- plus_cov
154 my_ylim <- max(abs(plus_cov))
155 my_ylim <- c( -my_ylim, my_ylim ) * (1.0 + off(0.2) )
156 }
155 } 157 }
158 my_load_distal <- loado
159 my_load_proximal <- loadp
160 alpha <- 0.1 + 0.4 * vipco
161 alpha[is.na(alpha)] <- 0
162 my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha)
163 main_label <- sprintf(
164 "Features influencing orthogonal projection for %s versus %s"
165 , fctr_lvl_1, fctr_lvl_2)
156 } 166 }
157 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) 167 main_cex <- min(1.0, 46.0/nchar(main_label))
158 my_load_distal <- loado 168 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward
159 my_load_proximal <- loadp 169 my_pch <- sapply(X = cor_p_value, function(x) if (x < 0.01) 16 else if (x < 0.05) 17 else 18)
160 alpha <- 0.1 + 0.4 * vipco 170 if ( sum(is.infinite(my_xlim)) == 0 ) {
161 alpha[is.na(alpha)] <- 0 171 plot(
162 my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha) 172 y = my_y
163 main_label <- sprintf( 173 , x = my_x
164 "Features influencing orthogonal projection for %s versus %s" 174 , type = "p"
165 , fctr_lvl_1, fctr_lvl_2) 175 , xlim = my_xlim
166 } 176 , ylim = my_ylim
167 main_cex <- min(1.0, 46.0/nchar(main_label)) 177 , xlab = my_xlab
168 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward 178 , ylab = my_ylab
169 plot( 179 , main = main_label
170 y = my_y 180 , cex.main = main_cex
171 , x = my_x 181 , cex = cex
172 , type = "p" 182 , pch = my_pch
173 , xlim = my_xlim 183 , col = my_col
174 , ylim = my_ylim
175 , xlab = my_xlab
176 , ylab = my_ylab
177 , main = main_label
178 , cex.main = main_cex
179 , cex = cex
180 , pch = 16
181 , col = my_col
182 )
183 low_x <- -0.7 * lim_x
184 high_x <- 0.7 * lim_x
185 if (projection == 1 && !cplot_x) {
186 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue")
187 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red")
188 }
189 if ( x_show_labels != "0" ) {
190 names(my_load_distal) <- tsv1$featureID
191 names(my_load_proximal) <- tsv1$featureID
192 if ( x_show_labels == "ALL" ) {
193 n_labels <- length(my_load_distal)
194 } else {
195 n_labels <- as.numeric(x_show_labels)
196 }
197 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 )
198 labels_to_show <- c(
199 names(head(sort(my_load_distal),n = n_labels))
200 , names(tail(sort(my_load_distal),n = n_labels))
201 )
202 labels <- unname(
203 sapply(
204 X = tsv1$featureID
205 , FUN = function(x) if( x %in% labels_to_show ) x else ""
206 ) 184 )
207 ) 185 low_x <- -0.7 * lim_x
208 x_text_offset <- 0.024 186 high_x <- 0.7 * lim_x
209 y_text_off <- 0.017 187 if (projection == 1 && !cplot_x) {
210 if (!cplot_x) { # S-plot 188 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue")
211 y_text_offset <- if (projection == 1) -y_text_off else y_text_off 189 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red")
212 } else { # C-plot 190 }
213 y_text_offset <- 191 if ( x_show_labels != "0" ) {
214 sapply( 192 names(my_load_distal) <- tsv1$featureID
215 X = (my_y > 0) 193 names(my_load_proximal) <- tsv1$featureID
216 , FUN = function(x) { if (x) y_text_off else -y_text_off } 194 if ( x_show_labels == "ALL" ) {
195 n_labels <- length(my_load_distal)
196 } else {
197 n_labels <- as.numeric(x_show_labels)
198 }
199 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 )
200 labels_to_show <- c(
201 names(head(sort(my_load_distal), n = n_labels))
202 , names(tail(sort(my_load_distal), n = n_labels))
217 ) 203 )
218 } 204 labels <- unname(
219 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { 205 sapply(
220 # print("str(x_arg)") 206 X = tsv1$featureID
221 # print(str(x_arg)) 207 , FUN = function(x) if ( x %in% labels_to_show ) x else ""
222 # print("str(y_arg)")
223 # print(str(y_arg))
224 # print("str(labels_arg)")
225 # print(str(labels_arg))
226 if (length(labels_arg) > 0) {
227 unique_slant <- unique(slant_arg)
228 if (length(unique_slant) == 1) {
229 text(
230 y = y_arg
231 , x = x_arg + x_text_offset
232 , cex = 0.4
233 , labels = labels_arg
234 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
235 , srt = slant_arg
236 , adj = 0 # left-justified
237 ) 208 )
209 )
210 x_text_offset <- 0.024
211 y_text_off <- 0.017
212 if (!cplot_x) {
213 # S-plot
214 y_text_offset <- if (projection == 1) -y_text_off else y_text_off
238 } else { 215 } else {
239 for (slant in unique_slant) { 216 # C-plot
240 text( 217 y_text_offset <-
241 y = y_arg[slant_arg == slant] 218 sapply(
242 , x = x_arg[slant_arg == slant] + x_text_offset 219 X = (my_y > 0)
243 , cex = 0.4 220 , FUN = function(x) {
244 , labels = labels_arg[slant_arg == slant] 221 if (x) y_text_off else -y_text_off
245 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels 222 }
246 , srt = slant 223 )
247 , adj = 0 # left-justified 224 }
225 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) {
226 if (length(labels_arg) > 0) {
227 unique_slant <- unique(slant_arg)
228 if (length(unique_slant) == 1) {
229 text(
230 y = y_arg
231 , x = x_arg + x_text_offset
232 , cex = 0.4
233 , labels = labels_arg
234 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
235 , srt = slant_arg
236 , adj = 0 # left-justified
237 )
238 } else {
239 for (slant in unique_slant) {
240 text(
241 y = y_arg[slant_arg == slant]
242 , x = x_arg[slant_arg == slant] + x_text_offset
243 , cex = 0.4
244 , labels = labels_arg[slant_arg == slant]
245 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
246 , srt = slant
247 , adj = 0 # left-justified
248 )
249 }
250 }
251 }
252 }
253 if (!cplot_x) {
254 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant
255 } else {
256 my_slant <- sapply(
257 X = (my_y > 0)
258 , FUN = function(x) if (x) 2 else -2
259 ) * my_feature_label_slant
260 }
261 if (length(my_x) > 1) {
262 label_features(
263 x_arg = my_x [my_x > 0]
264 , y_arg = my_y [my_x > 0] - y_text_offset
265 , labels_arg = labels[my_x > 0]
266 , slant_arg = (if (!cplot_x) -my_slant else (my_slant))
267 )
268 if (!cplot_x) {
269 label_features(
270 x_arg = my_x [my_x < 0]
271 , y_arg = my_y [my_x < 0] + y_text_offset
272 , labels_arg = labels[my_x < 0]
273 , slant_arg = my_slant
248 ) 274 )
249 } 275 }
250 } 276 } else {
251 } 277 if (!cplot_x) {
252 } 278 my_slant <- (if (my_x > 1) -1 else 1) * my_slant
253 if (!cplot_x) { 279 my_y_arg <- my_y + (if (my_x > 1) -1 else 1) * y_text_offset
254 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant 280 } else {
281 my_slant <- (if (my_y > 1) -1 else 1) * my_slant
282 my_y_arg <- my_y + (if (my_y > 1) -1 else 1) * y_text_offset
283 }
284 label_features(
285 x_arg = my_x
286 , y_arg = my_y_arg
287 , labels_arg = labels
288 , slant_arg = my_slant
289 )
290 } # end if (length(my_x) > 1)
291 } # end if ( x_show_labels != "0" )
255 } else { 292 } else {
256 my_slant <- sapply( 293 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
257 X = (my_y > 0) 294 text(x=1, y=1, labels="no S-plot is possible")
258 , FUN = function(x) if (x) 2 else -2 295 } # end if (sum(is.infinte(my_xlim))==0)
259 ) * my_feature_label_slant 296 } else {
260 } 297 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
261 if (length(my_x) > 1) { 298 text(x=1, y=1, labels="no S-plot is possible")
262 label_features( 299 } # end if (length(plus_cor) != 0 && length(plus_cor) != 0)
263 x_arg = my_x [my_x > 0] 300 } # end action
264 , y_arg = my_y [my_x > 0] - y_text_offset 301 ) # end with( my_cor_vs_cov, action )
265 , labels_arg = labels[my_x > 0]
266 , slant_arg = (if (!cplot_x) -my_slant else (my_slant))
267 )
268 if (!cplot_x) {
269 label_features(
270 x_arg = my_x [my_x < 0]
271 , y_arg = my_y [my_x < 0] + y_text_offset
272 , labels_arg = labels[my_x < 0]
273 , slant_arg = my_slant
274 )
275 }
276 } else {
277 if (!cplot_x) {
278 my_slant <- (if (my_x > 1) -1 else 1) * my_slant
279 my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset
280 } else {
281 my_slant <- (if (my_y > 1) -1 else 1) * my_slant
282 my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset
283 }
284 label_features(
285 x_arg = my_x
286 , y_arg = my_y_arg
287 , labels_arg = labels
288 , slant_arg = my_slant
289 )
290 }
291 }
292 }
293 )
294 return (my_cor_vs_cov) 302 return (my_cor_vs_cov)
295 } 303 } # end function do_s_plot
296 my_cor_vs_cov <- do_s_plot( 304 my_cor_vs_cov <- do_s_plot(
297 x_env = x_env 305 x_env = x_env
298 , predictor_projection_x = TRUE 306 , predictor_projection_x = TRUE
299 , cplot_x = FALSE 307 , cplot_x = FALSE
300 ) 308 )
301 typeVc <- c("correlation", # 1 309 typevc <- c("correlation", # 1
302 "outlier", # 2 310 "outlier", # 2
303 "overview", # 3 311 "overview", # 3
304 "permutation", # 4 312 "permutation", # 4
305 "predict-train", # 5 313 "predict-train", # 5
306 "predict-test", # 6 314 "predict-test", # 6
310 "x-variance", # 10 318 "x-variance", # 10
311 "xy-score", # 11 319 "xy-score", # 11
312 "xy-weight" # 12 320 "xy-weight" # 12
313 ) # [c(3,8,9)] # [c(4,3,8,9)] 321 ) # [c(3,8,9)] # [c(4,3,8,9)]
314 if ( length(my_oplsda@orthoVipVn) > 0 ) { 322 if ( length(my_oplsda@orthoVipVn) > 0 ) {
315 my_typevc <- typeVc[c(9,3,8)] 323 my_typevc <- typevc[c(9,3,8)]
316 } else { 324 } else {
317 my_typevc <- c("(dummy)","overview","(dummy)") 325 my_typevc <- c("(dummy)","overview","(dummy)")
318 } 326 }
319 my_ortho_cor_vs_cov_exists <- FALSE 327 my_ortho_cor_vs_cov_exists <- FALSE
320 for (my_type in my_typevc) { 328 for (my_type in my_typevc) {
321 if (my_type %in% typeVc) { 329 if (my_type %in% typevc) {
322 tryCatch({ 330 tryCatch({
323 if ( my_type != "x-loading" ) { 331 if ( my_type != "x-loading" ) {
324 plot( 332 plot(
325 x = my_oplsda 333 x = my_oplsda
326 , typeVc = my_type 334 , typeVc = my_type
327 , parCexN = 0.4 335 , parCexN = 0.4
328 , parDevNewL = FALSE 336 , parDevNewL = FALSE
329 , parLayL = TRUE 337 , parLayL = TRUE
330 , parEllipsesL = TRUE 338 , parEllipsesL = TRUE
331 ) 339 )
332 if (my_type == "overview") { 340 if (my_type == "overview") {
333 sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2) 341 sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2)
334 title(sub = sub_label) 342 title(sub = sub_label)
335 } 343 }
336 } else { 344 } else {
337 my_ortho_cor_vs_cov <- do_s_plot( 345 my_ortho_cor_vs_cov <- do_s_plot(
338 x_env = x_env 346 x_env = x_env
339 , predictor_projection_x = FALSE 347 , predictor_projection_x = FALSE
340 , cplot_x = FALSE 348 , cplot_x = FALSE
341 ) 349 )
342 my_ortho_cor_vs_cov_exists <- TRUE 350 my_ortho_cor_vs_cov_exists <- TRUE
351 }
343 } 352 }
344 }, error = function(e) { 353 , error = function(e) {
345 x_progress( 354 x_progress(
346 sprintf( 355 sprintf(
347 "factor level %s or %s may have only one sample - %s" 356 "factor level %s or %s may have only one sample - %s"
348 , fctr_lvl_1 357 , fctr_lvl_1
349 , fctr_lvl_2 358 , fctr_lvl_2
358 } 367 }
359 cplot_p <- x_env$cplot_p 368 cplot_p <- x_env$cplot_p
360 cplot_o <- x_env$cplot_o 369 cplot_o <- x_env$cplot_o
361 if (cplot_p || cplot_o) { 370 if (cplot_p || cplot_o) {
362 if (cplot_p) { 371 if (cplot_p) {
363 do_s_plot( 372 if (!is.null(my_cor_vs_cov)) {
364 x_env = x_env 373 do_s_plot(
365 , predictor_projection_x = TRUE 374 x_env = x_env
366 , cplot_x = TRUE 375 , predictor_projection_x = TRUE
367 , cor_vs_cov_x = my_cor_vs_cov 376 , cplot_x = TRUE
368 ) 377 , cor_vs_cov_x = my_cor_vs_cov
378 )
379 } else {
380 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
381 text(x=1, y=1, labels="no predictor projection is possible")
382 }
369 did_plots <- 1 383 did_plots <- 1
370 } else { 384 } else {
371 did_plots <- 0 385 did_plots <- 0
372 } 386 }
373 if (cplot_o) { 387 if (cplot_o) {
416 return ( FALSE ) 430 return ( FALSE )
417 } 431 }
418 432
419 # extract parameters from the environment 433 # extract parameters from the environment
420 vrbl_metadata <- calc_env$vrbl_metadata 434 vrbl_metadata <- calc_env$vrbl_metadata
421 vrbl_metadata_names <- vrbl_metadata[,1] 435 vrbl_metadata_names <- vrbl_metadata[, 1]
422 smpl_metadata <- calc_env$smpl_metadata 436 smpl_metadata <- calc_env$smpl_metadata
423 data_matrix <- calc_env$data_matrix 437 data_matrix <- calc_env$data_matrix
424 pairSigFeatOnly <- calc_env$pairSigFeatOnly 438 pair_significant_features_only <- calc_env$pairSigFeatOnly
425 facC <- calc_env$facC 439 facC <- calc_env$facC
426 tesC <- calc_env$tesC 440 tesC <- calc_env$tesC
427 # extract the levels from the environment 441 # extract the levels from the environment
428 originalLevCSV <- levCSV <- calc_env$levCSV 442 originalLevCSV <- levCSV <- calc_env$levCSV
429 # matchingC is one of { "none", "wildcard", "regex" } 443 # matchingC is one of { "none", "wildcard", "regex" }
430 matchingC <- calc_env$matchingC 444 matchingC <- calc_env$matchingC
431 labelFeatures <- calc_env$labelFeatures 445 labelFeatures <- calc_env$labelFeatures
446 minCrossvalI <- as.integer(calc_env$min_crossval_i)
432 447
433 # arg/env checking 448 # arg/env checking
434 if (!(facC %in% names(smpl_metadata))) { 449 if (!(facC %in% names(smpl_metadata))) {
435 failure_action( 450 failure_action(
436 sprintf("bad parameter! Factor name '%s' not found in sampleMetadata" 451 sprintf("bad parameter! Factor name '%s' not found in sampleMetadata"
444 459
445 rt <- vrbl_metadata$rt 460 rt <- vrbl_metadata$rt
446 names(rt) <- vrbl_metadata$variableMetadata 461 names(rt) <- vrbl_metadata$variableMetadata
447 rt_lookup <- function(feature) unname(rt[feature]) 462 rt_lookup <- function(feature) unname(rt[feature])
448 463
449 # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv) 464 # calculate salience_df as data.frame(feature, max_level, max_median, salience_rcv, mean_median, salience, salience_rcv)
450 salience_df <- calc_env$salience_df <- w4msalience( 465 salience_df <- calc_env$salience_df <- w4msalience(
451 data_matrix = data_matrix 466 data_matrix = data_matrix
452 , sample_class = smpl_metadata[,facC] 467 , sample_class = smpl_metadata[,facC]
453 , failure_action = failure_action 468 , failure_action = failure_action
454 ) 469 )
455 salience_tsv_action({ 470 salience_tsv_action({
456 my_df <- data.frame( 471 with (
457 featureID = salience_df$feature 472 salience_df
458 , salientLevel = salience_df$max_level 473 , {
459 , salientRCV = salience_df$salient_rcv 474 my_df <<- data.frame(
460 , salience = salience_df$salience 475 featureID = feature
461 , mz = mz_lookup(salience_df$feature) 476 , salientLevel = max_level
462 , rt = rt_lookup(salience_df$feature) 477 , salienceRCV = salience_rcv
463 ) 478 , relativeSalientDistance = relative_salient_distance
464 my_df[order(-my_df$salience),] 479 , salience = salience
480 , mz = mz_lookup(feature)
481 , rt = rt_lookup(feature)
482 )
483 })
484 my_df[order(-my_df$relativeSalientDistance),]
465 }) 485 })
466 486
467 # transform wildcards to regexen 487 # transform wildcards to regexen
468 if (matchingC == "wildcard") { 488 if (matchingC == "wildcard") {
469 # strsplit(x = "hello,wild,world", split = ",") 489 # strsplit(x = "hello,wild,world", split = ",")
571 ) 591 )
572 my_cor_cov <- do_detail_plot( 592 my_cor_cov <- do_detail_plot(
573 x_dataMatrix = my_matrix 593 x_dataMatrix = my_matrix
574 , x_predictor = predictor 594 , x_predictor = predictor
575 , x_is_match = TRUE 595 , x_is_match = TRUE
576 , x_algorithm = algoC 596 , x_algorithm = "nipals"
577 , x_prefix = if (pairSigFeatOnly) { 597 , x_prefix = if (pair_significant_features_only) {
578 "Significantly contrasting features" 598 "Significantly contrasting features"
579 } else { 599 } else {
580 "Significant features" 600 "Significant features"
581 } 601 }
582 , x_show_labels = labelFeatures 602 , x_show_labels = labelFeatures
583 , x_progress = progress_action 603 , x_progress = progress_action
584 , x_crossval_i = min(7, length(chosen_samples)) 604 , x_crossval_i = min(minCrossvalI, length(chosen_samples))
585 , x_env = calc_env 605 , x_env = calc_env
586 ) 606 )
587 if ( is.null(my_cor_cov) ) { 607 if ( is.null(my_cor_cov) ) {
588 progress_action("NOTHING TO PLOT") 608 progress_action("NOTHING TO PLOT")
589 } else { 609 } else {
638 } else { 658 } else {
639 1 659 1
640 } 660 }
641 ) 661 )
642 col_selector <- vrbl_metadata_names[ 662 col_selector <- vrbl_metadata_names[
643 if ( pairSigFeatOnly ) fully_significant else overall_significant 663 if (pair_significant_features_only) fully_significant else overall_significant
644 ] 664 ]
645 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] 665 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ]
646 my_cor_cov <- do_detail_plot( 666 my_cor_cov <- do_detail_plot(
647 x_dataMatrix = my_matrix 667 x_dataMatrix = my_matrix
648 , x_predictor = predictor 668 , x_predictor = predictor
649 , x_is_match = is_match 669 , x_is_match = is_match
650 , x_algorithm = algoC 670 , x_algorithm = "nipals"
651 , x_prefix = if (pairSigFeatOnly) { 671 , x_prefix = if (pair_significant_features_only) {
652 "Significantly contrasting features" 672 "Significantly contrasting features"
653 } else { 673 } else {
654 "Significant features" 674 "Significant features"
655 } 675 }
656 , x_show_labels = labelFeatures 676 , x_show_labels = labelFeatures
657 , x_progress = progress_action 677 , x_progress = progress_action
658 , x_crossval_i = min(7, length(chosen_samples)) 678 , x_crossval_i = min(minCrossvalI, length(chosen_samples))
659 , x_env = calc_env 679 , x_env = calc_env
660 ) 680 )
661 if ( is.null(my_cor_cov) ) { 681 if ( is.null(my_cor_cov) ) {
662 progress_action("NOTHING TO PLOT.") 682 progress_action("NOTHING TO PLOT.")
663 } else { 683 } else {
679 ) 699 )
680 } 700 }
681 } 701 }
682 } 702 }
683 } 703 }
684 } else { # tesC == "none" 704 } else {
705 # tesC == "none"
685 # find all the levels for factor facC in sampleMetadata 706 # find all the levels for factor facC in sampleMetadata
686 level_union <- unique(sort(smpl_metadata_facC)) 707 level_union <- unique(sort(smpl_metadata_facC))
687 # identify the selected levels for factor facC from sampleMetadata 708 # identify the selected levels for factor facC from sampleMetadata
688 level_include <- sapply(X = level_union, FUN = isLevelSelected) 709 level_include <- sapply(X = level_union, FUN = isLevelSelected)
689 # discard the non-selected levels for factor facC 710 # discard the non-selected levels for factor facC
697 , FUN = function(x) { 718 , FUN = function(x) {
698 fctr_lvl_1 <- x[1] 719 fctr_lvl_1 <- x[1]
699 fctr_lvl_2 <- { 720 fctr_lvl_2 <- {
700 if ( fctr_lvl_1 %in% completed ) 721 if ( fctr_lvl_1 %in% completed )
701 return("DUMMY") 722 return("DUMMY")
702 # strF(completed)
703 completed <<- c(completed, fctr_lvl_1) 723 completed <<- c(completed, fctr_lvl_1)
704 setdiff(level_union, fctr_lvl_1) 724 setdiff(level_union, fctr_lvl_1)
705 } 725 }
706 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) 726 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
707 fctr_lvl_2 <- "other" 727 fctr_lvl_2 <- "other"
728 ) 748 )
729 my_cor_cov <- do_detail_plot( 749 my_cor_cov <- do_detail_plot(
730 x_dataMatrix = my_matrix 750 x_dataMatrix = my_matrix
731 , x_predictor = predictor 751 , x_predictor = predictor
732 , x_is_match = is_match 752 , x_is_match = is_match
733 , x_algorithm = algoC 753 , x_algorithm = "nipals"
734 , x_prefix = "Features" 754 , x_prefix = "Features"
735 , x_show_labels = labelFeatures 755 , x_show_labels = labelFeatures
736 , x_progress = progress_action 756 , x_progress = progress_action
737 , x_crossval_i = min(7, length(chosen_samples)) 757 , x_crossval_i = min(minCrossvalI, length(chosen_samples))
738 , x_env = calc_env 758 , x_env = calc_env
739 ) 759 )
740 if ( is.null(my_cor_cov) ) { 760 if ( is.null(my_cor_cov) ) {
741 progress_action("NOTHING TO PLOT...") 761 progress_action("NOTHING TO PLOT...")
742 } else { 762 } else {
781 ) 801 )
782 my_cor_cov <- do_detail_plot( 802 my_cor_cov <- do_detail_plot(
783 x_dataMatrix = my_matrix 803 x_dataMatrix = my_matrix
784 , x_predictor = predictor 804 , x_predictor = predictor
785 , x_is_match = is_match 805 , x_is_match = is_match
786 , x_algorithm = algoC 806 , x_algorithm = "nipals"
787 , x_prefix = "Features" 807 , x_prefix = "Features"
788 , x_show_labels = labelFeatures 808 , x_show_labels = labelFeatures
789 , x_progress = progress_action 809 , x_progress = progress_action
790 , x_crossval_i = min(7, length(chosen_samples)) 810 , x_crossval_i = min(minCrossvalI, length(chosen_samples))
791 , x_env = calc_env 811 , x_env = calc_env
792 ) 812 )
793 if ( is.null(my_cor_cov) ) { 813 if ( is.null(my_cor_cov) ) {
794 progress_action("NOTHING TO PLOT.....") 814 progress_action("NOTHING TO PLOT.....")
795 } else { 815 } else {
815 } 835 }
816 } 836 }
817 if (!did_plot) { 837 if (!did_plot) {
818 failure_action( 838 failure_action(
819 sprintf( 839 sprintf(
820 "bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'" 840 "Did not plot. Does sampleMetadata have at least two levels of factor '%s' matching '%s' ?"
821 , facC, originalLevCSV)) 841 , facC, originalLevCSV))
822 return ( FALSE ) 842 return ( FALSE )
823 } 843 }
824 return ( TRUE ) 844 return ( TRUE )
825 } 845 }
832 cor_vs_cov <- function( 852 cor_vs_cov <- function(
833 matrix_x 853 matrix_x
834 , ropls_x 854 , ropls_x
835 , predictor_projection_x = TRUE 855 , predictor_projection_x = TRUE
836 , x_progress = print 856 , x_progress = print
857 , x_env
837 ) { 858 ) {
838 tryCatch({ 859 tryCatch({
839 return( 860 return(
840 cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress) 861 cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress, x_env)
841 ) 862 )
842 }, error = function(e) { 863 }
864 , error = function(e) {
843 x_progress( 865 x_progress(
844 sprintf( 866 sprintf(
845 "cor_vs_cov fatal error - %s" 867 "cor_vs_cov fatal error - %s"
846 , as.character(e) # e$message 868 , as.character(e) # e$message
847 ) 869 )
849 return ( NULL ) 871 return ( NULL )
850 }) 872 })
851 } 873 }
852 874
853 cor_vs_cov_try <- function( 875 cor_vs_cov_try <- function(
854 matrix_x 876 matrix_x # rows are samples; columns, features
855 , ropls_x 877 , ropls_x # an instance of ropls::opls
856 , predictor_projection_x = TRUE 878 , predictor_projection_x = TRUE # TRUE for predictor projection; FALSE for orthogonal projection
857 , x_progress = print 879 , x_progress = print # function to produce progress and error messages
880 , x_env
858 ) { 881 ) {
882 my_matrix_x <- matrix_x
883 my_matrix_x[my_matrix_x==0] <- NA
884 fdr_features <- x_env$fdr_features
885
859 x_class <- class(ropls_x) 886 x_class <- class(ropls_x)
860 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) 887 if ( !( as.character(x_class) == "opls" ) ) {
861 stop( 888 stop(
862 paste( 889 paste(
863 "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class " 890 "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class "
864 , as.character(x_class) 891 , as.character(x_class)
865 ) 892 )
866 ) 893 )
867 } 894 }
895 if ( !ropls_x@suppLs$algoC == "nipals" ) {
896 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS
897 stop(
898 paste(
899 "cor_vs_cov: Expected ropls::opls instance to have been computed by the NIPALS algorithm rather than "
900 , ropls_x@suppLs$algoC
901 )
902 )
903 }
868 result <- list() 904 result <- list()
869 result$projection <- projection <- if (predictor_projection_x) 1 else 2 905 result$projection <- projection <- if (predictor_projection_x) 1 else 2
870 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS 906
871 if ( ropls_x@suppLs$algoC == "nipals") { 907 # I used equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510
872 # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 908 # (and not from the supplement despite the statement that, for the NIPALS algorithm,
873 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) 909 # the equations from the supplement should be used) because of the definition of the
874 mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) 910 # Pearson/Galton coefficient of correlation is defined as
875 if (predictor_projection_x) 911 # $$
876 score_matrix <- ropls_x@scoreMN 912 # \rho_{X,Y}= \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y}
877 else 913 # $$
878 score_matrix <- ropls_x@orthoScoreMN 914 # as described (among other places) on Wikipedia at
879 score_matrix_transposed <- t(score_matrix) 915 # https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_population
880 score_matrix_magnitude <- mag(score_matrix) 916 # The equations in the supplement said to use, for the predictive component t1,
881 result$covariance <- 917 # \rho_{t1,X_i}= \frac{\operatorname{cov}(t1,X_i)}{(\operatorname{mag}(t1))(\operatorname{mag}(X_i))}
882 score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) 918 # but the results that I got were dramatically different from published results for S-PLOTs;
883 result$correlation <- 919 # perhaps my data are not centered exactly the same way that theirs were.
884 score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) 920 # The correlations calculated here are in agreement with those calculated with the code from
921 # page 22 of https://cran.r-project.org/web/packages/muma/muma.pdf
922
923
924 # count the features/variables (one column for each sample)
925 # count the features/variables (one column for each sample)
926 n_features <- ncol(my_matrix_x)
927 all_n_features <- x_env$fdr_features
928 if (length(grep("^[0-9][0-9]*$", all_n_features)) > 0) {
929 all_n_features <- as.integer(all_n_features)
885 } else { 930 } else {
886 # WARNING - untested code - I don't have test data to exercise this branch 931 all_n_features <- n_features
887 # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 932 }
888 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F 933 fdr_n_features <- max(n_features, all_n_features)
889 if (predictor_projection_x) 934 # print("n_features")
890 score_matrix <- ropls_x@scoreMN 935 # print(n_features)
891 else 936
892 score_matrix <- ropls_x@orthoScoreMN 937 # count the samples/observations (one row for each sample)
893 score_matrix_transposed <- t(score_matrix) 938 n_observations <- nrow(my_matrix_x)
894 cov_divisor <- nrow(matrix_x) - 1 939
895 result$covariance <- sapply( 940 # choose whether to plot the predictive score vector or orthogonal score vector
896 X = 1:ncol(matrix_x) 941 if (predictor_projection_x)
897 , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor 942 score_vector <- ropls_x@scoreMN
943 else
944 score_vector <- ropls_x@orthoScoreMN
945
946 # compute the covariance of each feature with the score vector
947 result$covariance <-
948 sapply(
949 X = 1:n_features
950 , FUN = function(i) {
951 cov(score_vector, my_matrix_x[ , i, drop = TRUE], use = "pairwise.complete.obs")
952 }
898 ) 953 )
899 score_sd <- sapply( 954 # access covariance by feature name
900 X = 1:ncol(score_matrix) 955 names(result$covariance) <- colnames(my_matrix_x)
901 , FUN = function(x) sd(score_matrix[,x]) 956
957 # compute the correlation of each feature with the score vector
958 result$correlation <-
959 sapply(
960 X = 1:n_features
961 , FUN = function(i) {
962 cor(score_vector, my_matrix_x[ , i, drop = TRUE], use = "pairwise.complete.obs")
963 }
902 ) 964 )
903 # xSdVn - Numerical vector: variable standard deviations of the 'x' matrix 965 # access correlation by feature name
904 xSdVn <- ropls_x@xSdVn 966 names(result$correlation) <- colnames(my_matrix_x)
905 result$correlation <- sapply( 967
906 X = 1:ncol(matrix_x) 968 # eliminate NAs in either correlation or covariance
907 , FUN = function(x) { 969 nas <- is.na(result$correlation) | is.na(result$covariance)
908 ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) ) 970 featureID <- names(ropls_x@vipVn)
909 } 971 featureID <- featureID[!nas]
910 ) 972 result$correlation <- result$correlation[!nas]
911 } 973 result$covariance <- result$covariance[!nas]
912 result$correlation <- result$correlation[ 1, , drop = TRUE ] 974 n_features <- length(featureID)
913 result$covariance <- result$covariance [ 1, , drop = TRUE ] 975
914 976 correl_pci <- lapply(
915 # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014 977 X = 1:n_features
978 , FUN = function(i) {
979 correl.ci(
980 r = result$correlation[i]
981 , n_obs = n_observations
982 , n_vars = fdr_n_features
983 )
984 }
985 )
986 result$p_value_raw <- sapply(
987 X = 1:n_features
988 , FUN = function(i) correl_pci[[i]]$p.value.raw
989 )
990 result$p_value_raw[is.na(result$p_value_raw)] <- 1.0
991 result$p_value <- sapply(
992 X = 1:n_features
993 , FUN = function(i) correl_pci[[i]]$p.value
994 )
995 result$p_value[is.na(result$p_value)] <- 1.0
996 result$ci_lower <- sapply(
997 X = 1:n_features
998 , FUN = function(i) correl_pci[[i]]$CI["lower"]
999 )
1000 result$ci_upper <- sapply(
1001 X = 1:n_features
1002 , FUN = function(i) correl_pci[[i]]$CI["upper"]
1003 )
1004
1005
1006 # extract "variant 4 of Variable Influence on Projection for OPLS" (see Galindo_Prieto_2014, DOI 10.1002/cem.2627)
916 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) 1007 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.)
917 result$vip4p <- as.numeric(ropls_x@vipVn) 1008 result$vip4p <- as.numeric(ropls_x@vipVn)[!nas]
918 result$vip4o <- as.numeric(ropls_x@orthoVipVn) 1009 result$vip4o <- as.numeric(ropls_x@orthoVipVn)[!nas]
919 result$loadp <- as.numeric(ropls_x@loadingMN) 1010 if (length(result$vip4o) == 0) result$vip4o <- NA
920 result$loado <- as.numeric(ropls_x@orthoLoadingMN) 1011 # extract the loadings
1012 result$loadp <- as.numeric(ropls_x@loadingMN)[!nas]
1013 result$loado <- as.numeric(ropls_x@orthoLoadingMN)[!nas]
921 # get the level names 1014 # get the level names
922 level_names <- sort(levels(as.factor(ropls_x@suppLs$y))) 1015 level_names <- sort(levels(as.factor(ropls_x@suppLs$y)))
923 fctr_lvl_1 <- level_names[1] 1016 fctr_lvl_1 <- level_names[1]
924 fctr_lvl_2 <- level_names[2] 1017 fctr_lvl_2 <- level_names[2]
925 feature_count <- length(ropls_x@vipVn) 1018 result$level1 <- rep.int(x = fctr_lvl_1, times = n_features)
926 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) 1019 result$level2 <- rep.int(x = fctr_lvl_2, times = n_features)
927 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count)
928 superresult <- list()
929 if (length(result$vip4o) == 0) result$vip4o <- NA
930 greaterLevel <- sapply( 1020 greaterLevel <- sapply(
931 X = result$correlation 1021 X = result$correlation
932 , FUN = function(my_corr) 1022 , FUN = function(my_corr) {
933 tryCatch({ 1023 tryCatch({
934 if ( is.nan( my_corr ) ) { 1024 if ( is.na(my_corr) || ! is.numeric( my_corr ) ) {
935 print("my_corr is NaN") 1025 NA
936 NA
937 } else { 1026 } else {
938 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 1027 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2
939 } 1028 }
940 }, error = function(e) { 1029 }
1030 , error = function(e) {
1031 print(my_corr)
941 x_progress( 1032 x_progress(
942 sprintf( 1033 sprintf(
943 "cor_vs_cov -> sapply: error - substituting NA - %s" 1034 "cor_vs_cov -> sapply: error - substituting NA - %s"
944 , as.character(e) 1035 , as.character(e)
945 ) 1036 )
946 ) 1037 )
947 NA 1038 NA
948 }) 1039 }
1040 )
1041 }
949 ) 1042 )
950 1043
951 # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 1044 # build a data frame to hold the content for the tab-separated values file
952 featureID <- names(ropls_x@vipVn)
953 greaterLevel <- greaterLevel[featureID]
954 result$correlation <- result$correlation[featureID]
955 result$covariance <- result$covariance[featureID]
956 # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
957
958 tsv1 <- data.frame( 1045 tsv1 <- data.frame(
959 featureID = featureID 1046 featureID = featureID
960 , factorLevel1 = result$level1 1047 , factorLevel1 = result$level1
961 , factorLevel2 = result$level2 1048 , factorLevel2 = result$level2
962 , greaterLevel = greaterLevel 1049 , greaterLevel = greaterLevel
963 , projection = result$projection 1050 , projection = result$projection
964 , correlation = result$correlation 1051 , correlation = result$correlation
965 , covariance = result$covariance 1052 , covariance = result$covariance
966 , vip4p = result$vip4p 1053 , vip4p = result$vip4p
967 , vip4o = result$vip4o 1054 , vip4o = result$vip4o
968 , loadp = result$loadp 1055 , loadp = result$loadp
969 , loado = result$loado 1056 , loado = result$loado
970 , row.names = NULL 1057 , cor_p_val_raw = result$p_value_raw
1058 , cor_p_value = result$p_value
1059 , cor_ci_lower = result$ci_lower
1060 , cor_ci_upper = result$ci_upper
971 ) 1061 )
972 tsv1 <- tsv1[!is.na(tsv1$correlation),] 1062 rownames(tsv1) <- tsv1$featureID
973 tsv1 <- tsv1[!is.na(tsv1$covariance),] 1063
974 superresult$tsv1 <- tsv1 1064 # build the superresult, i.e., the result returned by this function
975 rownames(superresult$tsv1) <- tsv1$featureID 1065 superresult <- list()
976 superresult$projection <- result$projection 1066 superresult$projection <- result$projection
977 superresult$covariance <- result$covariance 1067 superresult$covariance <- result$covariance
978 superresult$correlation <- result$correlation 1068 superresult$correlation <- result$correlation
979 superresult$vip4p <- result$vip4p 1069 superresult$vip4p <- result$vip4p
980 superresult$vip4o <- result$vip4o 1070 superresult$vip4o <- result$vip4o
981 superresult$loadp <- result$loadp 1071 superresult$loadp <- result$loadp
982 superresult$loado <- result$loado 1072 superresult$loado <- result$loado
1073 superresult$cor_p_value <- tsv1$cor_p_value
983 superresult$details <- result 1074 superresult$details <- result
984 result$superresult <- superresult 1075
985 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways 1076 # remove any rows having NA for covariance or correlation
986 result$oplsda <- ropls_x 1077 tsv1 <- tsv1[!is.na(tsv1$correlation),]
987 result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways 1078 tsv1 <- tsv1[!is.na(tsv1$covariance),]
1079 superresult$tsv1 <- tsv1
1080
1081 # # I did not include these but left them commentd out in case future
1082 # # consumers of this routine want to use it in currently unanticipated ways
1083 # result$superresult <- superresult
1084 # result$oplsda <- ropls_x
1085 # result$predictor <- ropls_x@suppLs$y
1086
988 return (superresult) 1087 return (superresult)
989 } 1088 }
990 1089
991 # vim: sw=2 ts=2 et : 1090 # Code for correl.ci was adapted from correl function from:
1091 # @book{
1092 # Tsagris_2018,
1093 # author = {Tsagris, Michail},
1094 # year = {2018},
1095 # link = {https://www.researchgate.net/publication/324363311_Multivariate_data_analysis_in_R},
1096 # title = {Multivariate data analysis in R}
1097 # }
1098 # which follows
1099 # https://en.wikipedia.org/wiki/Fisher_transformation#Definition
1100
1101 correl.ci <- function(r, n_obs, n_vars, a = 0.05, rho = 0) {
1102 ## r is the calculated correlation coefficient for n_obs pairs of observations of one variable
1103 ## a is the significance level
1104 ## rho is the hypothesised correlation
1105 zh0 <- atanh(rho) # 0.5*log((1+rho)/(1-rho)), i.e., Fisher's z-transformation for Ho
1106 zh1 <- atanh(r) # 0.5*log((1+r)/(1-r)), i.e., Fisher's z-transformation for H1
1107 se <- (1 - r^2)/sqrt(n_obs - 3) ## standard error for Fisher's z-transformation of Ho
1108 test <- (zh1 - zh0)/se ### test statistic
1109 pvalue <- 2*(1 - pnorm(abs(test))) ## p-value
1110 pvalue.adj <- p.adjust(p = pvalue, method = "BY", n = n_vars)
1111 z_L <- zh1 - qnorm(1 - a/2)*se
1112 z_h <- zh1 + qnorm(1 - a/2)*se
1113 fish_l <- tanh(z_L) # (exp(2*z_l)-1)/(exp(2*z_l)+1), i.e., lower confidence limit
1114 fish_h <- tanh(z_h) # (exp(2*z_h)-1)/(exp(2*z_h)+1), i.e., upper confidence limit
1115 ci <- c(fish_l, fish_h)
1116 names(ci) <- c("lower", "upper")
1117 list(
1118 correlation = r
1119 , p.value.raw = pvalue
1120 , p.value = pvalue.adj
1121 , CI = ci
1122 )
1123 }
1124
1125 # vim: sw=2 ts=2 et ai :