Mercurial > repos > eschen42 > mqppep_anova
comparison mqppep_anova_script.Rmd @ 7:d728198f1ba5 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 9a0fa6d0f9aadc069a5551a54da6daf307885637"
author | eschen42 |
---|---|
date | Tue, 15 Mar 2022 00:35:16 +0000 |
parents | c1403d18c189 |
children | 4deacfee76ef |
comparison
equal
deleted
inserted
replaced
6:922d309640db | 7:d728198f1ba5 |
---|---|
1 --- | 1 --- |
2 title: "Quant Data Processing Script" | 2 title: "MaxQuant Phospho-Proteomic Enrichment Pipeline ANOVA" |
3 author: "Larry Cheng; Art Eschenlauer" | 3 author: "Larry Cheng; Art Eschenlauer" |
4 date: "May 28, 2018; Nov 16, 2021" | 4 date: "May 28, 2018; Nov 16, 2021" |
5 output: | 5 output: |
6 html_document: default | |
7 pdf_document: default | 6 pdf_document: default |
8 params: | 7 params: |
9 inputFile: "Upstream_Map_pST_outputfile_STEP4.txt" | 8 inputFile: "test-data/test_input_for_anova.tabular" |
10 alphaFile: "alpha_levels.txt" | 9 alphaFile: "test-data/alpha_levels.tabular" |
11 firstDataColumn: "Intensity" | 10 firstDataColumn: "Intensity" |
12 imputationMethod: !r c("group-median","median","mean","random")[4] | 11 imputationMethod: !r c("group-median", "median", "mean", "random")[1] |
13 meanPercentile: 1 | 12 meanPercentile: 1 |
14 sdPercentile: 0.2 | 13 sdPercentile: 0.2 |
15 regexSampleNames: "\\.(\\d+)[A-Z]$" | 14 regexSampleNames: "\\.(\\d+)[A-Z]$" |
16 regexSampleGrouping: "(\\d+)" | 15 regexSampleGrouping: "(\\d+)" |
17 imputedDataFilename: "Upstream_Map_pST_outputfile_STEP4_QN_LT.txt" | 16 imputedDataFilename: "Upstream_Map_pST_outputfile_STEP4_QN_LT.txt" |
18 --- | 17 --- |
19 ```{r setup, include=FALSE} | 18 ```{r setup, include = FALSE} |
20 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 | 19 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 |
21 knitr::opts_chunk$set(echo = FALSE, fig.dim=c(9,10)) | 20 knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10)) |
21 | |
22 ### FUNCTIONS | |
23 | |
24 #ANOVA filter function | |
25 anova_func <- function(x, grouping_factor) { | |
26 x_aov <- aov(as.numeric(x) ~ grouping_factor) | |
27 pvalue <- summary(x_aov)[[1]][["Pr(>F)"]][1] | |
28 pvalue | |
29 } | |
22 ``` | 30 ``` |
23 | 31 |
24 ## Purpose: | 32 ## Purpose: |
25 Perform imputation of missing values, quantile normalization, and ANOVA. | 33 Perform imputation of missing values, quantile normalization, and ANOVA. |
26 | 34 |
27 <!-- | 35 <!-- |
28 ## Variables to change for each input file | 36 ## Variables to change for each input file |
29 --> | 37 --> |
30 ```{r include = FALSE} | 38 ```{r include = FALSE} |
31 #Input Filename | 39 # Input Filename |
32 inputFile <- params$inputFile | 40 input_file <- params$inputFile |
33 | 41 |
34 #First data column - ideally, this could be detected via regexSampleNames, but for now leave it as is. | 42 # First data column - ideally, this could be detected via regexSampleNames, |
35 firstDataColumn <- params$firstDataColumn | 43 # but for now leave it as is. |
36 FDC_is_integer <- TRUE | 44 first_data_column <- params$firstDataColumn |
37 firstDataColumn <- withCallingHandlers( | 45 fdc_is_integer <- TRUE |
38 as.integer(firstDataColumn) | 46 first_data_column <- withCallingHandlers( |
39 , warning = function(w) FDC_is_integer <<- FALSE | 47 as.integer(first_data_column) |
48 , warning = function(w) fdc_is_integer <<- FALSE | |
40 ) | 49 ) |
41 if (FALSE == FDC_is_integer) { | 50 if (FALSE == fdc_is_integer) { |
42 firstDataColumn <- params$firstDataColumn | 51 first_data_column <- params$firstDataColumn |
43 } | 52 } |
44 | 53 |
45 #False discovery rate adjustment for ANOVA (Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05) | 54 # False discovery rate adjustment for ANOVA |
46 valFDR <- read.table(file = params$alphaFile, sep = "\t", header=F, quote="")[,1] | 55 # Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05 |
56 val_fdr <- | |
57 read.table(file = params$alphaFile, sep = "\t", header = F, quote = "")[, 1] | |
47 | 58 |
48 #Imputed Data filename | 59 #Imputed Data filename |
49 imputedDataFilename <- params$imputedDataFilename | 60 imputed_data_filename <- params$imputedDataFilename |
50 | 61 |
51 #ANOVA data filename | 62 #ANOVA data filename |
52 ``` | 63 ``` |
53 | 64 |
54 ```{r include = FALSE} | 65 ```{r echo = FALSE} |
55 #Imputation method, should be one of c("random","group-median","median","mean") | 66 # Imputation method, should be one of |
56 imputationMethod <- params$imputationMethod | 67 # "random", "group-median", "median", or "mean" |
57 | 68 imputation_method <- params$imputationMethod |
58 #Selection of percentile of logvalue data to set the mean for random number generation when using random imputation | 69 |
59 meanPercentile <- params$meanPercentile / 100.0 | 70 # Selection of percentile of logvalue data to set the mean for random number |
60 | 71 # generation when using random imputation |
61 #deviation adjustment-factor for random values; real number. | 72 mean_percentile <- params$meanPercentile / 100.0 |
62 sdPercentile <- params$sdPercentile | 73 |
63 | 74 # deviation adjustment-factor for random values; real number. |
64 #Regular expression of Sample Names, e.g., "\\.(\\d+)[A-Z]$" | 75 sd_percentile <- params$sdPercentile |
65 regexSampleNames <- params$regexSampleNames | 76 |
66 | 77 # Regular expression of Sample Names, e.g., "\\.(\\d+)[A-Z]$" |
67 #Regular expression to extract Sample Grouping from Sample Name (if error occurs, compare sampleNumbers and tempMatches to see if groupings/pairs line up) | 78 regex_sample_names <- params$regexSampleNames |
68 # e.g., "(\\d+)" | 79 |
69 regexSampleGrouping <- params$regexSampleGrouping | 80 # Regular expression to extract Sample Grouping from Sample Name; |
70 | 81 # if error occurs, compare sample_factor_levels and temp_matches |
71 ``` | 82 # to see if groupings/pairs line up |
72 | 83 # e.g., "(\\d+)" |
73 | 84 regex_sample_grouping <- params$regexSampleGrouping |
74 ```{r include = FALSE} | 85 |
75 ### FUNCTIONS | 86 ``` |
76 | 87 |
77 #ANOVA filter function | 88 ```{r echo = FALSE} |
78 anovaFunc <- function(x, groupingFactor) { | 89 ### READ DATA |
79 x.aov = aov(as.numeric(x) ~ groupingFactor) | 90 |
80 pvalue = summary(x.aov)[[1]][["Pr(>F)"]][1] | 91 library(data.table) |
81 pvalue | 92 |
82 } | 93 # read.table reads a file in table format and creates a data frame from it. |
83 ``` | 94 # - note that `quote = ""` means that quotation marks are treated literally. |
84 | 95 full_data <- read.table( |
85 | 96 file = input_file, |
97 sep = "\t", | |
98 header = T, | |
99 quote = "", | |
100 check.names = FALSE | |
101 ) | |
102 ``` | |
103 | |
104 ### Column names from input file | |
105 | |
106 ```{r echo = FALSE, results = 'markup'} | |
107 print(colnames(full_data)) | |
108 data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE) | |
109 cat(sprintf("First data column: %d\n", min(data_column_indices))) | |
110 cat(sprintf("Last data column: %d\n", max(data_column_indices))) | |
111 ``` | |
112 | |
113 ```{r echo = FALSE, results = 'asis'} | |
114 cat("\\newpage\n") | |
115 ``` | |
86 | 116 |
87 ### Checking that log-transformed sample distributions are similar: | 117 ### Checking that log-transformed sample distributions are similar: |
88 ```{r echo=FALSE} | 118 |
89 | 119 ```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'} |
90 library(data.table) | 120 |
91 | 121 if (FALSE == fdc_is_integer) { |
92 # read.table reads a file in table format and creates a data frame from it. | 122 |
93 # - note that `quote=""` means that quotation marks are treated literally. | 123 if (length(data_column_indices) > 0) { |
94 fullData <- read.table(file = inputFile, sep = "\t", header=T, quote="", check.names=FALSE) | 124 first_data_column <- data_column_indices[1] |
95 print(colnames(fullData)) | |
96 #head(fullData) | |
97 | |
98 if (FALSE == FDC_is_integer) { | |
99 dataColumnIndices <- grep(firstDataColumn, names(fullData), perl=TRUE) | |
100 str(dataColumnIndices) | |
101 if (length(dataColumnIndices) > 0) { | |
102 firstDataColumn <- dataColumnIndices[1] | |
103 } else { | 125 } else { |
104 stop(paste("failed to convert firstDataColumn:", firstDataColumn)) | 126 stop(paste("failed to convert firstDataColumn:", first_data_column)) |
105 } | 127 } |
106 } | 128 } |
107 | 129 |
108 quantData0 <- fullData[firstDataColumn:length(fullData)] | 130 quant_data0 <- full_data[first_data_column:length(full_data)] |
109 quantData <- fullData[firstDataColumn:length(fullData)] | 131 quant_data <- full_data[first_data_column:length(full_data)] |
110 quantData[quantData==0] <- NA #replace 0 with NA | 132 quant_data[quant_data == 0] <- NA #replace 0 with NA |
111 quantDataLog <- log10(quantData) | 133 quant_data_log <- log10(quant_data) |
112 | 134 |
113 rownames(quantDataLog) <- fullData$Phosphopeptide | 135 rownames(quant_data_log) <- full_data$Phosphopeptide |
114 | 136 |
115 summary(quantDataLog) | 137 # data visualization |
116 | |
117 #data visualization | |
118 old_par <- par( | 138 old_par <- par( |
119 mai=par("mai") + c(0.5,0,0,0) | 139 mai = par("mai") + c(0.5, 0, 0, 0) |
120 ) | 140 ) |
121 boxplot( | 141 boxplot( |
122 quantDataLog | 142 quant_data_log |
123 , las=2 | 143 , las = 2 |
124 ) | 144 ) |
125 par(old_par) | 145 par(old_par) |
126 | 146 |
127 quantDataLog_stack <- stack(quantDataLog) | 147 |
128 ``` | 148 |
129 | 149 cat("\\newline\n") |
130 ```{r echo = FALSE, fig.align="left", fig.dim=c(9,5)} | 150 cat("\\newline\n") |
151 | |
152 ``` | |
153 | |
154 ```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 4), warning = FALSE} | |
155 quant_data_log_stack <- stack(quant_data_log) | |
131 library(ggplot2) | 156 library(ggplot2) |
132 ggplot(quantDataLog_stack, aes(x=values)) + geom_density(aes(group=ind, colour=ind)) | 157 ggplot( |
158 quant_data_log_stack, | |
159 aes(x = values)) + geom_density(aes(group = ind, colour = ind)) | |
133 ``` | 160 ``` |
134 | 161 |
135 ### Globally, are phosphopeptide intensities are approximately unimodal? | 162 ### Globally, are phosphopeptide intensities are approximately unimodal? |
136 ```{r echo = FALSE,fig.align="left", fig.dim=c(9,5)} | 163 |
137 | 164 <!-- |
138 # ref for bquote particularly and plotting math expressions generally: | 165 # ref for bquote below particularly and plotting math expressions generally: |
139 # https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/ | 166 # https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/ |
140 | 167 --> |
141 #identify the location of missing values | 168 ```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5)} |
142 fin <- is.finite(as.numeric(as.matrix(quantDataLog))) | 169 |
143 | 170 # identify the location of missing values |
144 logvalues <- as.numeric(as.matrix(quantDataLog))[fin] | 171 fin <- is.finite(as.numeric(as.matrix(quant_data_log))) |
172 | |
173 logvalues <- as.numeric(as.matrix(quant_data_log))[fin] | |
145 plot( | 174 plot( |
146 density(logvalues) | 175 density(logvalues), |
147 , main = bquote("Smoothed estimated probability density vs." ~ log[10](intensity)) | 176 main = bquote( |
148 , xlab = bquote(log[10](intensity)) | 177 "Smoothed estimated probability density vs." ~ log[10](intensity)), |
149 ) | 178 xlab = bquote(log[10](intensity)) |
179 ) | |
150 hist( | 180 hist( |
151 x = as.numeric(as.matrix(quantDataLog)) | 181 x = as.numeric(as.matrix(quant_data_log)) |
152 , breaks = 100 | 182 , breaks = 100 |
153 , main = bquote("Frequency vs." ~ log[10](intensity)) | 183 , main = bquote("Frequency vs." ~ log[10](intensity)) |
154 , xlab = bquote(log[10](intensity)) | 184 , xlab = bquote(log[10](intensity)) |
155 ) | 185 ) |
156 ``` | 186 ``` |
157 | 187 |
158 <!-- | |
159 ## Impute missing values | |
160 --> | |
161 | |
162 ### Distribution of standard deviations of phosphopeptides, ignoring missing values: | 188 ### Distribution of standard deviations of phosphopeptides, ignoring missing values: |
163 | 189 |
164 ```{r echo = FALSE, fig.align="left", fig.dim=c(9,5)} | 190 ```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5)} |
165 #determine quantile | 191 # determine quantile |
166 q1 <- quantile(logvalues, probs = meanPercentile)[1] | 192 q1 <- quantile(logvalues, probs = mean_percentile)[1] |
167 | 193 |
168 #determine standard deviation of quantile to impute | 194 # determine standard deviation of quantile to impute |
169 sd_finite <- function(x) { | 195 sd_finite <- function(x) { |
170 ok <- is.finite(x) | 196 ok <- is.finite(x) |
171 sd(x[ok]) * sdPercentile | 197 sd(x[ok]) * sd_percentile |
172 } | 198 } |
173 sds <- apply(quantDataLog, 1, sd_finite) # 1 = row of matrix (ie, phosphopeptide) | 199 # 1 = row of matrix (ie, phosphopeptide) |
200 sds <- apply(quant_data_log, 1, sd_finite) | |
174 plot( | 201 plot( |
175 density(sds, na.rm=T) | 202 density(sds, na.rm = T) |
176 , main="Smoothed estimated probability density vs. std. deviation" | 203 , main = "Smoothed estimated probability density vs. std. deviation" |
177 , sub="(probability estimation made with Gaussian smoothing)" | 204 , sub = "(probability estimation made with Gaussian smoothing)" |
178 ) | 205 ) |
179 | 206 |
180 m1 <- median(sds, na.rm=T) #sd to be used is the median sd | 207 m1 <- median(sds, na.rm = T) #sd to be used is the median sd |
181 | 208 |
182 ``` | 209 ``` |
183 | 210 |
184 | 211 |
185 | 212 |
186 <!-- | 213 <!-- |
187 The number of missing values are: | 214 The number of missing values are: |
188 --> | 215 --> |
189 ```{r echo=FALSE} | 216 ```{r echo = FALSE} |
190 #Determine number of cells to impute | 217 #Determine number of cells to impute |
191 temp <- quantData[is.na(quantData)] | 218 temp <- quant_data[is.na(quant_data)] |
192 | 219 |
193 #Determine number of values to impute | 220 #Determine number of values to impute |
194 NoToImpute <- length(temp) | 221 number_to_impute <- length(temp) |
195 ``` | 222 ``` |
196 | 223 |
197 <!-- | 224 <!-- |
198 % of values that are missing: | 225 % of values that are missing: |
199 --> | 226 --> |
200 ```{r echo=FALSE} | 227 ```{r echo = FALSE} |
201 pct_missing_values <- length(temp)/(length(logvalues)+length(temp)) * 100 | 228 pct_missing_values <- length(temp) / (length(logvalues) + length(temp)) * 100 |
202 ``` | 229 ``` |
203 | 230 |
204 <!-- | 231 <!-- |
205 First few rows of data before imputation: | 232 First few rows of data before imputation: |
206 --> | 233 --> |
234 ```{r echo = FALSE, results = 'asis'} | |
235 cat("\\newpage\n") | |
236 ``` | |
237 | |
238 ## Parse sample names | |
239 | |
240 Parse the names of the samples to deduce the factor level for each sample: | |
241 | |
242 ```{r echo = FALSE} | |
243 | |
244 # prep for trt-median based imputation | |
245 | |
246 # Assuming that regex_sample_names <- "\\.(\\d+)[A-Z]$" | |
247 # get factors -> | |
248 # group runs (samples) by ignoring terminal [A-Z] in sample names | |
249 | |
250 m <- regexpr(regex_sample_names, names(quant_data), perl = TRUE) | |
251 temp_matches <- regmatches(names(quant_data), m) | |
252 print("Extracted sample names") | |
253 print(temp_matches) | |
254 m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE) | |
255 sample_factor_levels <- as.factor(regmatches(temp_matches, m2)) | |
256 print("Factor levels") | |
257 print(sample_factor_levels) | |
258 | |
259 ``` | |
207 ## Impute missing values | 260 ## Impute missing values |
208 ```{r echo = FALSE} | 261 |
209 | 262 ```{r echo = FALSE} |
210 #ACE start segment: trt-median based imputation | 263 |
211 # prep for trt-median based imputation | |
212 | |
213 # Assuming that regexSampleNames <- "\\.(\\d+)[A-Z]$" | |
214 # get factors -> group runs (samples) by ignoring terminal [A-Z] in sample names | |
215 # regexpr(pattern, text, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE) | |
216 m <- regexpr(regexSampleNames, names(quantData), perl=TRUE) | |
217 tempMatches <- regmatches(names(quantData), m) | |
218 print("Extracted sample names") | |
219 print(tempMatches) | |
220 m2 <- regexpr(regexSampleGrouping, tempMatches, perl=TRUE) | |
221 sampleNumbers <- as.factor(regmatches(tempMatches, m2)) | |
222 print("Factor levels") | |
223 print(sampleNumbers) | |
224 | |
225 ``` | |
226 ```{r echo = FALSE} | |
227 | |
228 #ACE hack begin | |
229 #Determine number of cells to impute | 264 #Determine number of cells to impute |
230 cat( | 265 cat("Before imputation,", |
231 sprintf("Before imputation, there are:\n %d peptides\n %d missing values (%2.0f%s)" | 266 sprintf( |
232 , sum(rep.int(TRUE, nrow(quantData))) | 267 "there are:\n %d peptides\n %d missing values (%2.0f%s)", |
233 , sum(is.na(quantData)) | 268 sum(rep.int(TRUE, nrow(quant_data))), |
234 , pct_missing_values | 269 sum(is.na(quant_data)), |
235 , "%" | 270 pct_missing_values, |
236 ) | 271 "%" |
237 ) | 272 ) |
238 #ACE hack end | 273 ) |
239 | 274 |
240 ``` | 275 ``` |
241 ```{r echo = FALSE} | 276 ```{r echo = FALSE} |
242 | 277 |
243 #Impute data | 278 #Impute data |
244 quantDataImputed <- quantData | 279 quant_data_imp <- quant_data |
245 | 280 |
246 # Identify which values are missing and need to be imputed | 281 # Identify which values are missing and need to be imputed |
247 ind <- which(is.na(quantDataImputed), arr.ind=TRUE) | 282 ind <- which(is.na(quant_data_imp), arr.ind = TRUE) |
248 | 283 |
249 ``` | 284 ``` |
250 ```{r echo = FALSE} | 285 ```{r echo = FALSE} |
251 | 286 |
252 # Apply imputation | 287 # Apply imputation |
253 switch( | 288 switch( |
254 imputationMethod | 289 imputation_method |
255 , "group-median"={ | 290 , "group-median" = { |
256 cat("Imputation method: substitute missing value with median peptide-intensity for sample-group\n") | 291 cat("Imputation method:\n substitute missing value", |
257 #goodRows <- rep.int(TRUE, nrow(quantDataImputed)) | 292 "with median peptide-intensity for sample-group\n") |
258 sampleLevelIntegers <- as.integer(sampleNumbers) | 293 |
259 for (i in 1:length(levels(sampleNumbers))) { | 294 sample_level_integers <- as.integer(sample_factor_levels) |
260 levelCols <- i == sampleLevelIntegers | 295 for (i in seq_len(length(levels(sample_factor_levels)))) { |
261 ind <- which(is.na(quantDataImputed[,levelCols]), arr.ind=TRUE) | 296 level_cols <- i == sample_level_integers |
262 quantDataImputed[ind,levelCols] <- apply(quantDataImputed[,levelCols], 1, median, na.rm=T)[ind[,1]] | 297 ind <- which(is.na(quant_data_imp[, level_cols]), arr.ind = TRUE) |
298 quant_data_imp[ind, level_cols] <- | |
299 apply(quant_data_imp[, level_cols], 1, median, na.rm = T)[ind[, 1]] | |
263 } | 300 } |
264 goodRows <- !is.na(rowMeans(quantDataImputed)) | 301 good_rows <- !is.na(rowMeans(quant_data_imp)) |
265 } | 302 } |
266 , "median"={ | 303 , "median" = { |
267 cat("Imputation method: substitute missing value with median peptide-intensity across all sample classes\n") | 304 cat("Imputation method:\n substitute missing value with", |
268 quantDataImputed[ind] <- apply(quantDataImputed, 1, median, na.rm=T)[ind[,1]] | 305 "median peptide-intensity across all sample classes\n") |
269 goodRows <- !is.na(rowMeans(quantDataImputed)) | 306 quant_data_imp[ind] <- apply(quant_data_imp, 1, median, na.rm = T)[ind[, 1]] |
307 good_rows <- !is.na(rowMeans(quant_data_imp)) | |
270 } | 308 } |
271 , "mean"={ | 309 , "mean" = { |
272 cat("Imputation method: substitute missing value with mean peptide-intensity across all sample classes\n") | 310 cat("Imputation method:\n substitute missing value with", |
273 quantDataImputed[ind] <- apply(quantDataImputed, 1, mean, na.rm=T)[ind[,1]] | 311 "mean peptide-intensity across all sample classes\n") |
274 goodRows <- !is.na(rowMeans(quantDataImputed)) | 312 quant_data_imp[ind] <- apply(quant_data_imp, 1, mean, na.rm = T)[ind[, 1]] |
313 good_rows <- !is.na(rowMeans(quant_data_imp)) | |
275 } | 314 } |
276 , "random"={ | 315 , "random" = { |
277 cat( | 316 cat( |
317 "Imputation method:\n substitute missing value with\n ", | |
278 sprintf( | 318 sprintf( |
279 "Imputation method: substitute missing value with random intensity N ~ (%0.2f, %0.2f)\n" | 319 "random intensity N ~ (%0.2f, %0.2f)\n" |
280 , q1, m1 | 320 , q1, m1 |
281 ) | 321 ) |
282 ) | 322 ) |
283 quantDataImputed[is.na(quantDataImputed)] <- 10^rnorm(NoToImpute, mean= q1, sd = m1) | 323 quant_data_imp[is.na(quant_data_imp)] <- |
284 goodRows <- !is.na(rowMeans(quantDataImputed)) | 324 10 ^ rnorm(number_to_impute, mean = q1, sd = m1) |
325 good_rows <- !is.na(rowMeans(quant_data_imp)) | |
285 } | 326 } |
286 ) | 327 ) |
287 | 328 |
288 ``` | 329 ``` |
289 ```{r echo = FALSE} | 330 ```{r echo = FALSE} |
290 | 331 |
291 #Determine number of cells to impute | 332 #Determine number of cells to impute |
292 temp <- quantDataImputed[is.na(quantDataImputed)] | 333 temp <- quant_data_imp[is.na(quant_data_imp)] |
293 cat( | 334 cat("After imputation, there are:", |
294 sprintf( | 335 sprintf( |
295 "After imputation, there are:\n %d missing values\n %d usable peptides\n %d peptides with too many missing values for further analysis" | 336 "\n %d missing values\n %d usable peptides analysis" |
296 , sum(is.na(quantDataImputed[goodRows,])) | 337 , sum(is.na(quant_data_imp[good_rows, ])) |
297 , sum(goodRows) | 338 , sum(good_rows) |
298 , sum(!goodRows) | 339 ), |
340 sprintf( | |
341 "\n %d peptides with too many missing values for further analysis" | |
342 , sum(!good_rows) | |
299 ) | 343 ) |
300 ) | 344 ) |
301 ``` | 345 ``` |
302 ```{r echo = FALSE} | 346 ```{r echo = FALSE} |
303 | 347 |
304 | 348 |
305 # Zap rows where imputation was ineffective | 349 # Zap rows where imputation was ineffective |
306 fullData <- fullData [goodRows, ] | 350 full_data <- full_data [good_rows, ] |
307 quantData <- quantData [goodRows, ] | 351 quant_data <- quant_data [good_rows, ] |
308 quantDataImputed <- quantDataImputed[goodRows, ] | 352 quant_data_imp <- quant_data_imp[good_rows, ] |
309 | 353 |
310 ``` | 354 ``` |
311 ```{r echo = FALSE} | 355 ```{r echo = FALSE} |
312 | 356 |
313 d_combined <- (density(as.numeric(as.matrix(log10(quantDataImputed))))) | 357 d_combined <- (density(as.numeric(as.matrix( |
314 d_original <- density(as.numeric(as.matrix(log10(quantDataImputed[!is.na(quantData)])))) | 358 log10(quant_data_imp) |
315 | 359 )))) |
316 ``` | 360 d_original <- |
317 ```{r echo = FALSE} | 361 density(as.numeric(as.matrix( |
318 | 362 log10(quant_data_imp[!is.na(quant_data)])))) |
319 if (sum(is.na(quantData)) > 0) { | 363 |
364 ``` | |
365 ```{r echo = FALSE} | |
366 | |
367 if (sum(is.na(quant_data)) > 0) { | |
320 # There ARE missing values | 368 # There ARE missing values |
321 d_imputed <- (density(as.numeric(as.matrix(log10(quantDataImputed[is.na(quantData)]))))) | 369 d_imputed <- |
370 (density(as.numeric(as.matrix( | |
371 log10(quant_data_imp[is.na(quant_data)]) | |
372 )))) | |
322 } else { | 373 } else { |
323 # There are NO missing values | 374 # There are NO missing values |
324 d_imputed <- d_combined | 375 d_imputed <- d_combined |
325 } | 376 } |
326 | 377 |
327 ``` | 378 ``` |
328 | 379 |
329 <!-- ```{r echo = FALSE, fig.cap = "Blue = Data before imputation; Red = Imputed data"} --> | 380 ```{r echo = FALSE, fig.dim = c(9, 5)} |
330 ```{r echo = FALSE, fig.dim=c(9,5)} | |
331 ylim <- c(0, max(d_combined$y, d_original$y, d_imputed$y)) | 381 ylim <- c(0, max(d_combined$y, d_original$y, d_imputed$y)) |
332 plot( | 382 plot( |
333 d_combined | 383 d_combined, |
334 , ylim = ylim | 384 ylim = ylim, |
335 , sub = "Blue = data before imputation; Red = imputed data" | 385 sub = "Blue = data before imputation; Red = imputed data", |
336 , main = "Density vs. log10(intensity) before and after imputation" | 386 main = "Density vs. log10(intensity) before and after imputation" |
337 ) | 387 ) |
338 lines(d_original, col="blue") | 388 lines(d_original, col = "blue") |
339 lines(d_imputed, col="red") | 389 lines(d_imputed, col = "red") |
340 ``` | 390 ``` |
341 | 391 |
342 ## Perform Quantile Normalization | 392 ## Perform Quantile Normalization |
343 ```{r echo=FALSE} | 393 |
344 library(preprocessCore) | 394 <!-- |
345 # Apply quantile normalization using preprocessCore::normalize.quantiles | 395 # Apply quantile normalization using preprocessCore::normalize.quantiles |
346 # --- | 396 # --- |
347 # tool repository: http://bioconductor.org/packages/release/bioc/html/preprocessCore.html | 397 # tool repository: http://bioconductor.org/packages/release/bioc/html/preprocessCore.html |
348 # except this: https://support.bioconductor.org/p/122925/#9135989 | 398 # except this: https://support.bioconductor.org/p/122925/#9135989 |
349 # says to install it like this: | 399 # says to install it like this: |
350 # ``` | 400 # ``` |
351 # BiocManager::install("preprocessCore", configure.args="--disable-threading", force = TRUE,lib=.libPaths()[1]) | 401 # BiocManager::install("preprocessCore", configure.args="--disable-threading", force = TRUE, lib=.libPaths()[1]) |
352 # ``` | 402 # ``` |
353 # conda installation (necessary because of a bug in recent openblas): | 403 # conda installation (necessary because of a bug in recent openblas): |
354 # conda install bioconductor-preprocesscore openblas=0.3.3 | 404 # conda install bioconductor-preprocesscore openblas=0.3.3 |
355 # ... | 405 # ... |
356 # --- | 406 # --- |
358 # | 408 # |
359 # Description: | 409 # Description: |
360 # Using a normalization based upon quantiles, this function normalizes a matrix of probe level intensities. | 410 # Using a normalization based upon quantiles, this function normalizes a matrix of probe level intensities. |
361 # | 411 # |
362 # Usage: | 412 # Usage: |
363 # normalize.quantiles(x,copy=TRUE, keep.names=FALSE) | 413 # normalize.quantiles(x, copy = TRUE, keep.names = FALSE) |
364 # | 414 # |
365 # Arguments: | 415 # Arguments: |
366 # | 416 # |
367 # - x: A matrix of intensities where each column corresponds to a chip and each row is a probe. | 417 # - x: A matrix of intensities where each column corresponds to a chip and each row is a probe. |
368 # | 418 # |
395 # - Bolstad, B. M., Irizarry R. A., Astrand, M, and Speed, T. P. (2003) A Comparison of | 445 # - Bolstad, B. M., Irizarry R. A., Astrand, M, and Speed, T. P. (2003) A Comparison of |
396 # Normalization Methods for High Density Oligonucleotide Array Data Based on Bias | 446 # Normalization Methods for High Density Oligonucleotide Array Data Based on Bias |
397 # and Variance. Bioinformatics 19(2), pp 185-193. DOI 10.1093/bioinformatics/19.2.185 | 447 # and Variance. Bioinformatics 19(2), pp 185-193. DOI 10.1093/bioinformatics/19.2.185 |
398 # http://bmbolstad.com/misc/normalize/normalize.html | 448 # http://bmbolstad.com/misc/normalize/normalize.html |
399 # ... | 449 # ... |
450 --> | |
451 ```{r echo = FALSE} | |
452 library(preprocessCore) | |
400 | 453 |
401 if (TRUE) { | 454 if (TRUE) { |
402 quantDataImputed.qn <- normalize.quantiles(as.matrix(quantDataImputed)) | 455 quant_data_imp_qn <- normalize.quantiles(as.matrix(quant_data_imp)) |
403 } else { | 456 } else { |
404 quantDataImputed.qn <- as.matrix(quantDataImputed) | 457 quant_data_imp_qn <- as.matrix(quant_data_imp) |
405 } | 458 } |
406 | 459 |
407 quantDataImputed.qn = as.data.frame(quantDataImputed.qn) | 460 quant_data_imp_qn <- as.data.frame(quant_data_imp_qn) |
408 names(quantDataImputed.qn) = names(quantDataImputed) | 461 names(quant_data_imp_qn) <- names(quant_data_imp) |
409 quantDataImputed_QN_log <- log10(quantDataImputed.qn) | 462 quant_data_imp_qn_log <- log10(quant_data_imp_qn) |
410 | 463 |
411 rownames(quantDataImputed_QN_log) <- fullData[,1] | 464 rownames(quant_data_imp_qn_log) <- full_data[, 1] |
412 | 465 |
413 quantDataImputed.qn.LS = t(scale(t(log10(quantDataImputed.qn)))) | 466 quant_data_imp_qn_ls <- t(scale(t(log10(quant_data_imp_qn)))) |
414 anyNaN <- function (x) { | 467 any_nan <- function(x) { |
415 !any(x == "NaN") | 468 !any(x == "NaN") |
416 } | 469 } |
417 sel = apply(quantDataImputed.qn.LS, 1, anyNaN) | 470 sel <- apply(quant_data_imp_qn_ls, 1, any_nan) |
418 quantDataImputed.qn.LS2 <- quantDataImputed.qn.LS[which(sel),] | 471 quant_data_imp_qn_ls2 <- quant_data_imp_qn_ls[which(sel), ] |
419 quantDataImputed.qn.LS2 = as.data.frame(quantDataImputed.qn.LS2) | 472 quant_data_imp_qn_ls2 <- as.data.frame(quant_data_imp_qn_ls2) |
420 | 473 |
421 #output quantile normalized data | 474 #output quantile normalized data |
422 dataTableImputed_QN_LT <- cbind(fullData[1:9], quantDataImputed_QN_log) | 475 data_table_imp_qn_lt <- cbind(full_data[1:9], quant_data_imp_qn_log) |
423 write.table(dataTableImputed_QN_LT, file = paste(paste(strsplit(imputedDataFilename, ".txt"),"QN_LT",sep="_"),".txt",sep=""), sep = "\t", col.names=TRUE, row.names=FALSE) | 476 write.table( |
477 data_table_imp_qn_lt, | |
478 file = paste(paste( | |
479 strsplit(imputed_data_filename, ".txt"), "QN_LT", sep = "_" | |
480 ), ".txt", sep = ""), | |
481 sep = "\t", | |
482 col.names = TRUE, | |
483 row.names = FALSE | |
484 ) | |
424 | 485 |
425 ``` | 486 ``` |
426 | 487 |
427 <!-- ACE insertion begin --> | 488 <!-- ACE insertion begin --> |
428 ### Checking that normalized, imputed, log-transformed sample distributions are similar: | 489 ### Checking that normalized, imputed, log-transformed sample distributions are similar: |
429 | 490 |
430 ```{r echo=FALSE} | 491 ```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'} |
431 #library(data.table) | 492 |
432 | 493 |
433 #Save unimputed quantDataLog for plotting below | 494 # Save unimputed quant_data_log for plotting below |
434 unimputedQuantDataLog <- quantDataLog | 495 unimputed_quant_data_log <- quant_data_log |
435 | 496 |
436 #Log10 transform (after preparing for zero values, which should never happen...) | 497 # log10 transform (after preparing for zero values, |
437 quantDataImputed.qn[quantDataImputed.qn == 0] <- .000000001 | 498 # which should never happen...) |
438 quantDataLog <- log10(quantDataImputed.qn) | 499 quant_data_imp_qn[quant_data_imp_qn == 0] <- .000000001 |
439 | 500 quant_data_log <- log10(quant_data_imp_qn) |
440 summary(quantDataLog) | 501 |
441 | 502 # Output quantile-normalized log-transformed dataset |
442 #Output quantile-normalized log-transformed dataset with imputed, normalized data | 503 # with imputed, normalized data |
443 | 504 |
444 dataTableImputed <- cbind(fullData[1:9], quantDataLog) | 505 data_table_imputed <- cbind(full_data[1:9], quant_data_log) |
445 write.table( | 506 write.table( |
446 dataTableImputed | 507 data_table_imputed |
447 , file=imputedDataFilename | 508 , file = imputed_data_filename |
448 , sep="\t" | 509 , sep = "\t" |
449 , col.names=TRUE | 510 , col.names = TRUE |
450 , row.names=FALSE | 511 , row.names = FALSE |
451 , quote=FALSE | 512 , quote = FALSE |
452 ) | 513 ) |
453 | 514 |
454 | 515 |
455 | 516 |
456 #data visualization | 517 # data visualization |
457 old_par <- par( | 518 old_par <- par( |
458 mai=par("mai") + c(0.5,0,0,0) | 519 mai = par("mai") + c(0.5, 0, 0, 0) |
459 , oma=par("oma") + c(0.5,0,0,0) | 520 , oma = par("oma") + c(0.5, 0, 0, 0) |
460 ) | 521 ) |
461 boxplot( | 522 boxplot( |
462 quantDataLog | 523 quant_data_log |
463 , las=2 | 524 , las = 2 |
464 ) | 525 ) |
465 par(old_par) | 526 par(old_par) |
466 ``` | 527 |
467 | 528 |
468 ```{r echo=FALSE, fig.dim=c(9,5)} | 529 |
469 quantDataLog_stack <- stack(quantDataLog) | 530 cat("\\newline\n") |
470 ggplot(quantDataLog_stack, aes(x=values)) + geom_density(aes(group=ind, colour=ind)) | 531 cat("\\newline\n") |
532 | |
533 ``` | |
534 | |
535 ```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 4)} | |
536 quant_data_log_stack <- stack(quant_data_log) | |
537 ggplot( | |
538 quant_data_log_stack, | |
539 aes(x = values) | |
540 ) + geom_density(aes(group = ind, colour = ind)) | |
471 ``` | 541 ``` |
472 | 542 |
473 ## Perform ANOVA filters | 543 ## Perform ANOVA filters |
474 | 544 |
475 ```{r,echo=FALSE} | 545 (see following pages) |
476 #Make new data frame containing only Phosphopeptides to connect preANOVA to ANOVA (connect_df) | 546 |
547 ```{r, echo = FALSE} | |
548 # Make new data frame containing only Phosphopeptides | |
549 # to connect preANOVA to ANOVA (connect_df) | |
477 connect_df <- data.frame( | 550 connect_df <- data.frame( |
478 dataTableImputed_QN_LT$Phosphopeptide | 551 data_table_imp_qn_lt$Phosphopeptide |
479 , dataTableImputed_QN_LT[,firstDataColumn] | 552 , data_table_imp_qn_lt[, first_data_column] |
480 ) | 553 ) |
481 colnames(connect_df) <- c("Phosphopeptide","Intensity") | 554 colnames(connect_df) <- c("Phosphopeptide", "Intensity") |
482 ``` | 555 ``` |
483 | 556 |
484 ```{r echo=FALSE, fig.dim=c(9,10)} | 557 ```{r echo = FALSE, fig.dim = c(9, 10), results = 'asis'} |
485 # Get factors -> group replicates (as indicated by terminal letter) by the preceding digits | 558 # Get factors -> group replicates (as indicated by terminal letter) |
486 # For example, group .1A .1B .1C into group 1; .2A .2B .2C, into group 2; etc.. | 559 # by the preceding digits; |
487 m <- regexpr(regexSampleNames, names(quantDataImputed_QN_log), perl=TRUE) | 560 # e.g., group .1A .1B .1C into group 1; .2A .2B .2C, into group 2; etc.. |
488 #ACE str(m) | 561 m <- |
489 tempMatches <- regmatches(names(quantDataImputed_QN_log), m) | 562 regexpr(regex_sample_names, names(quant_data_imp_qn_log), perl = TRUE) |
490 #ACE str(tempMatches) | 563 |
491 numSamples <- length(tempMatches) | 564 temp_matches <- regmatches(names(quant_data_imp_qn_log), m) |
492 #ACE str(numSamples) | 565 |
493 m2 <- regexpr(regexSampleGrouping, tempMatches, perl=TRUE) | 566 number_of_samples <- length(temp_matches) |
494 #ACE str(m2) | 567 |
495 #ACE str(regmatches(tempMatches, m2)) | 568 m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE) |
496 sampleNumbers <- as.factor(regmatches(tempMatches, m2)) | 569 |
497 #ACE str(sampleNumbers) | 570 |
498 | 571 sample_factor_levels <- as.factor(regmatches(temp_matches, m2)) |
499 if (length(levels(sampleNumbers))<2) { | 572 |
500 cat("ERROR!!!! Cannot perform ANOVA analysis because it requires two or more factor levels\n") | 573 |
574 if (length(levels(sample_factor_levels)) < 2) { | |
575 cat( | |
576 "ERROR!!!! Cannot perform ANOVA analysis", | |
577 "because it requires two or more factor levels\n" | |
578 ) | |
501 cat("Unparsed sample names are:\n") | 579 cat("Unparsed sample names are:\n") |
502 print(names(quantDataImputed_QN_log)) | 580 print(names(quant_data_imp_qn_log)) |
503 cat(sprintf("Parsing rule for SampleNames is '%s'\n", regexSampleNames)) | 581 cat(sprintf("Parsing rule for SampleNames is '%s'\n", regex_sample_names)) |
504 cat("Parsed names are:\n") | 582 cat("Parsed names are:\n") |
505 print(tempMatches) | 583 print(temp_matches) |
506 cat(sprintf("Parsing rule for SampleGrouping is '%s'\n", regexSampleGrouping)) | 584 cat(sprintf( |
585 "Parsing rule for SampleGrouping is '%s'\n", | |
586 regex_sample_grouping | |
587 )) | |
507 cat("Sample group assignments are:\n") | 588 cat("Sample group assignments are:\n") |
508 print(regmatches(tempMatches, m2)) | 589 print(regmatches(temp_matches, m2)) |
509 } else { | 590 } else { |
510 pValueData.anovaPs <- apply(quantDataImputed_QN_log, 1, anovaFunc, groupingFactor=sampleNumbers) | 591 p_value_data_anova_ps <- |
511 | 592 apply( |
512 pValueData.anovaPs.FDR <- p.adjust(pValueData.anovaPs, method="fdr") | 593 quant_data_imp_qn_log, |
513 pValueData <- data.frame( | 594 1, |
514 phosphopeptide = fullData[,1] | 595 anova_func, |
515 , rawANOVAp = pValueData.anovaPs | 596 grouping_factor = sample_factor_levels |
516 , FDRadjustedANOVAp = pValueData.anovaPs.FDR | 597 ) |
598 | |
599 p_value_data_anova_ps_fdr <- | |
600 p.adjust(p_value_data_anova_ps, method = "fdr") | |
601 p_value_data <- data.frame( | |
602 phosphopeptide = full_data[, 1] | |
603 , | |
604 raw_anova_p = p_value_data_anova_ps | |
605 , | |
606 fdr_adjusted_anova_p = p_value_data_anova_ps_fdr | |
517 ) | 607 ) |
518 #ACE rownames(pValueData) <- fullData[,1] | 608 |
519 # output ANOVA file to constructed filename, | 609 # output ANOVA file to constructed filename, |
520 # e.g. "Outputfile_pST_ANOVA_STEP5.txt" | 610 # e.g. "Outputfile_pST_ANOVA_STEP5.txt" |
521 # becomes "Outpufile_pST_ANOVA_STEP5_FDR0.05.txt" | 611 # becomes "Outpufile_pST_ANOVA_STEP5_FDR0.05.txt" |
522 | 612 |
523 #Re-output quantile-normalized log-transformed dataset with imputed, normalized data to include p-values | 613 # Re-output quantile-normalized log-transformed dataset |
524 | 614 # with imputed, normalized data to include p-values |
525 dataTableImputed <- cbind(fullData[1:9], pValueData[,2:3], quantDataLog) | 615 |
616 data_table_imputed <- | |
617 cbind(full_data[1:9], p_value_data[, 2:3], quant_data_log) | |
526 write.table( | 618 write.table( |
527 dataTableImputed | 619 data_table_imputed, |
528 , file=imputedDataFilename | 620 file = imputed_data_filename, |
529 , sep="\t" | 621 sep = "\t", |
530 , col.names=TRUE | 622 col.names = TRUE, |
531 , row.names=FALSE | 623 row.names = FALSE, |
532 , quote=FALSE | 624 quote = FALSE |
533 ) | 625 ) |
534 | 626 |
535 | 627 |
536 pValueData <- pValueData[order(pValueData$FDRadjustedANOVAp),] | 628 p_value_data <- |
537 | 629 p_value_data[order(p_value_data$fdr_adjusted_anova_p), ] |
538 cutoff <- valFDR[1] | 630 |
539 for (cutoff in valFDR){ #loop through FDR cutoffs | 631 cutoff <- val_fdr[1] |
540 | 632 for (cutoff in val_fdr) { |
541 filtered_p <- pValueData[which(pValueData$FDRadjustedANOVAp < cutoff),, drop = FALSE] | 633 #loop through FDR cutoffs |
542 filteredData.filtered <- quantDataImputed_QN_log[rownames(filtered_p),, drop = FALSE] | 634 |
543 filteredData.filtered <- filteredData.filtered[order(filtered_p$FDRadjustedANOVAp),, drop = FALSE] | 635 filtered_p <- |
636 p_value_data[ | |
637 which(p_value_data$fdr_adjusted_anova_p < cutoff), | |
638 , | |
639 drop = FALSE | |
640 ] | |
641 filtered_data_filtered <- | |
642 quant_data_imp_qn_log[ | |
643 rownames(filtered_p), | |
644 , | |
645 drop = FALSE | |
646 ] | |
647 filtered_data_filtered <- | |
648 filtered_data_filtered[ | |
649 order(filtered_p$fdr_adjusted_anova_p), | |
650 , | |
651 drop = FALSE | |
652 ] | |
544 | 653 |
545 # <!-- ACE insertion start --> | 654 # <!-- ACE insertion start --> |
546 old_oma <- par("oma") | 655 old_oma <- par("oma") |
547 old_par <- par( | 656 old_par <- par( |
548 mai=(par("mai") + c(0.7,0,0,0)) * c(1,1,0.3,1) | 657 mai = (par("mai") + c(0.7, 0, 0, 0)) * c(1, 1, 0.3, 1), |
549 , oma=old_oma * c(1,1,0.3,1) | 658 oma = old_oma * c(1, 1, 0.3, 1), |
550 , cex.main=0.9 | 659 cex.main = 0.9, |
551 , cex.axis=0.7 | 660 cex.axis = 0.7 |
552 ) | 661 ) |
553 | 662 |
554 if (nrow(filteredData.filtered) > 0) { | 663 cat("\\newpage\n") |
664 if (nrow(filtered_data_filtered) > 0) { | |
665 cat(sprintf( | |
666 "Intensities for peptides whose adjusted p-value < %0.2f\n", | |
667 cutoff | |
668 )) | |
669 cat("\\newline\n") | |
670 cat("\\newline\n") | |
671 | |
555 boxplot( | 672 boxplot( |
556 filteredData.filtered | 673 filtered_data_filtered, |
557 , main = sprintf("Imputed, normalized intensities where adjusted p-value < %0.2f", cutoff) | 674 main = "Imputed, normalized intensities", # no line plot |
558 # no line plot , main = "" | 675 las = 2, |
559 , las = 2 | 676 ylab = expression(log[10](intensity)) |
560 # , ylim = c(5.5,10) | |
561 , ylab = expression(log[10](intensity)) | |
562 ) | 677 ) |
563 } else { | 678 } else { |
564 cat(sprintf("No peptides were found to have cutoff adjusted p-value < %0.2f\n", cutoff)) | 679 cat(sprintf( |
680 "No peptides were found to have cutoff adjusted p-value < %0.2f\n", | |
681 cutoff | |
682 )) | |
565 } | 683 } |
566 par(old_par) | 684 par(old_par) |
567 | 685 |
568 #Add Phosphopeptide column to ANOVA filtered table | 686 if (nrow(filtered_data_filtered) > 0) { |
569 ANOVA.filtered_merge <- merge( | 687 #Add Phosphopeptide column to anova_filtered table |
688 anova_filtered_merge <- merge( | |
570 x = connect_df | 689 x = connect_df |
571 , y = filteredData.filtered | 690 , |
572 , by.x="Intensity" | 691 y = filtered_data_filtered |
573 , by.y=1 | 692 , |
693 by.x = "Intensity" | |
694 , | |
695 by.y = 1 | |
574 ) | 696 ) |
575 ANOVA.filtered_merge.order <- rownames(filtered_p) | 697 anova_filtered_merge_order <- rownames(filtered_p) |
576 | 698 |
577 ANOVA.filtered_merge.format <- sapply( | 699 anova_filtered_merge_format <- sapply( |
578 X = filtered_p$FDRadjustedANOVAp | 700 X = filtered_p$fdr_adjusted_anova_p |
579 , FUN = function(x) { | 701 , |
580 if (x > 0.0001) | 702 FUN = function(x) { |
581 paste0("(%0.",1+ceiling(-log10(x)),"f) %s") | 703 if (x > 0.0001) |
582 else | 704 paste0("(%0.", 1 + ceiling(-log10(x)), "f) %s") |
583 paste0("(%0.4e) %s") | 705 else |
706 paste0("(%0.4e) %s") | |
584 } | 707 } |
585 ) | |
586 | |
587 #ANOVA.filtered_merge.format <- paste0("(%0.",1+ceiling(-log10(filtered_p$FDRadjustedANOVAp)),"f) %s") | |
588 | |
589 ANOVA.filtered <- data.table( | |
590 ANOVA.filtered_merge$Phosphopeptide | |
591 , ANOVA.filtered_merge$Intensity | |
592 , ANOVA.filtered_merge[, 2:numSamples+1] | |
593 ) | 708 ) |
594 colnames(ANOVA.filtered) <- c("Phosphopeptide", colnames(filteredData.filtered)) | 709 |
595 | 710 |
596 # merge qualitative columns into the ANOVA data | 711 |
597 output_table <- data.frame(ANOVA.filtered$Phosphopeptide) | 712 anova_filtered <- data.table( |
598 output_table <- merge( | 713 anova_filtered_merge$Phosphopeptide |
714 , | |
715 anova_filtered_merge$Intensity | |
716 , | |
717 anova_filtered_merge[, 2:number_of_samples + 1] | |
718 ) | |
719 colnames(anova_filtered) <- | |
720 c("Phosphopeptide", colnames(filtered_data_filtered)) | |
721 | |
722 # merge qualitative columns into the ANOVA data | |
723 output_table <- data.frame(anova_filtered$Phosphopeptide) | |
724 output_table <- merge( | |
599 x = output_table | 725 x = output_table |
600 , y = dataTableImputed_QN_LT | 726 , |
601 , by.x = "ANOVA.filtered.Phosphopeptide" | 727 y = data_table_imp_qn_lt |
602 , by.y="Phosphopeptide" | 728 , |
729 by.x = "anova_filtered.Phosphopeptide" | |
730 , | |
731 by.y = "Phosphopeptide" | |
603 ) | 732 ) |
604 | 733 |
605 #Produce heatmap to visualize significance and the effect of imputation | 734 #Produce heatmap to visualize significance and the effect of imputation |
606 m <- as.matrix(unimputedQuantDataLog[ANOVA.filtered_merge.order,]) | 735 m <- |
607 if (nrow(m) > 0) { | 736 as.matrix(unimputed_quant_data_log[anova_filtered_merge_order, ]) |
608 rownames_m <- rownames(m) | 737 if (nrow(m) > 0) { |
609 rownames(m) <- sapply( | 738 rownames_m <- rownames(m) |
610 X = 1:nrow(m) | 739 rownames(m) <- sapply( |
611 , FUN = function(i) { | 740 X = seq_len(nrow(m)) |
741 , | |
742 FUN = function(i) { | |
612 sprintf( | 743 sprintf( |
613 ANOVA.filtered_merge.format[i] | 744 anova_filtered_merge_format[i] |
614 , filtered_p$FDRadjustedANOVAp[i] | 745 , |
615 , rownames_m[i] | 746 filtered_p$fdr_adjusted_anova_p[i] |
747 , | |
748 rownames_m[i] | |
616 ) | 749 ) |
617 } | 750 } |
618 ) | 751 ) |
619 margins <- c( | 752 margins <- c(max(nchar(colnames(m))) * 10 / 16 # col |
620 max(nchar(colnames(m))) * 10 / 16 # col | 753 , max(nchar(rownames(m))) * 5 / 16 # row |
621 , max(nchar(rownames(m))) * 5 / 16 # row | 754 ) |
622 ) | 755 how_many_peptides <- min(50, nrow(m)) |
623 how_many_peptides <- min(50, nrow(m)) | 756 |
624 | 757 cat("\\newpage\n") |
625 op <- par("cex.main") | 758 if (nrow(m) > 50) { |
626 try( | 759 cat("Heatmap for the 50 most-significant peptides", |
627 if (nrow(m) > 1) { | 760 sprintf( |
628 par(cex.main=0.6) | 761 "whose adjusted p-value < %0.2f\n", |
629 heatmap( | 762 cutoff) |
630 m[how_many_peptides:1,] | 763 ) |
631 , Rowv = NA | 764 } else { |
632 , Colv = NA | 765 cat("Heatmap for peptides whose", |
633 , cexRow = 0.7 | 766 sprintf("adjusted p-value < %0.2f\n", |
634 , cexCol = 0.8 | 767 cutoff) |
635 , scale="row" | 768 ) |
636 , margins = margins | 769 } |
637 , main = "Heatmap of unimputed, unnormalized intensities" | 770 cat("\\newline\n") |
638 , xlab = "" | 771 cat("\\newline\n") |
639 # , main = bquote( | 772 op <- par("cex.main") |
640 # .( how_many_peptides ) | 773 try( |
641 # ~ " peptides with adjusted p-value <" | 774 if (nrow(m) > 1) { |
642 # ~ .(sprintf("%0.2f", cutoff)) | 775 par(cex.main = 0.6) |
643 # ) | 776 heatmap( |
644 ) | 777 m[how_many_peptides:1, ], |
645 } | 778 Rowv = NA, |
646 ) | 779 Colv = NA, |
647 #ACE fig_dim knitr::opts_chunk$set(fig.dim = fig_dim) | 780 cexRow = 0.7, |
648 par(op) | 781 cexCol = 0.8, |
782 scale = "row", | |
783 margins = margins, | |
784 main = | |
785 "Heatmap of unimputed, unnormalized intensities", | |
786 xlab = "" | |
787 ) | |
788 } | |
789 ) | |
790 par(op) | |
791 } | |
649 } | 792 } |
650 | |
651 } | 793 } |
652 } | 794 } |
653 ``` | 795 ``` |
654 | 796 |
797 <!-- | |
655 ## Peptide IDs, etc. | 798 ## Peptide IDs, etc. |
656 | 799 |
657 See output files. | 800 See output files. |
801 --> |