Mercurial > repos > galaxyp > quantp
comparison quantp.r @ 1:dec87511835e draft
planemo upload commit 1887dff812162880d66b003a927867cd5000c98f
| author | galaxyp |
|---|---|
| date | Thu, 20 Dec 2018 16:05:27 -0500 |
| parents | f1db758949f4 |
| children | 1b11db35d7b5 |
comparison
equal
deleted
inserted
replaced
| 0:f1db758949f4 | 1:dec87511835e |
|---|---|
| 14 tempdf[is.na(tempdf)] = 0; | 14 tempdf[is.na(tempdf)] = 0; |
| 15 tempdf$Group = tempgrp; | 15 tempdf$Group = tempgrp; |
| 16 png(outfile, width = 6, height = 6, units = 'in', res=300); | 16 png(outfile, width = 6, height = 6, units = 'in', res=300); |
| 17 # bitmap(outfile, "png16m"); | 17 # bitmap(outfile, "png16m"); |
| 18 g = autoplot(prcomp(select(tempdf, -Group)), data = tempdf, colour = 'Group', size=3); | 18 g = autoplot(prcomp(select(tempdf, -Group)), data = tempdf, colour = 'Group', size=3); |
| 19 saveWidget(ggplotly(g), file.path(gsub("\\.png", "\\.html", outplot))) | |
| 19 plot(g); | 20 plot(g); |
| 20 dev.off(); | 21 dev.off(); |
| 21 } | 22 } |
| 22 | 23 |
| 23 #=============================================================================== | 24 #=============================================================================== |
| 28 rownames(PE_TE_data) = PE_TE_data$PE_ID; | 29 rownames(PE_TE_data) = PE_TE_data$PE_ID; |
| 29 regmodel = lm(PE_abundance~TE_abundance, data=PE_TE_data); | 30 regmodel = lm(PE_abundance~TE_abundance, data=PE_TE_data); |
| 30 regmodel_summary = summary(regmodel); | 31 regmodel_summary = summary(regmodel); |
| 31 | 32 |
| 32 cat("<font><h3>Linear Regression model fit between Proteome and Transcriptome data</h3></font>\n", | 33 cat("<font><h3>Linear Regression model fit between Proteome and Transcriptome data</h3></font>\n", |
| 33 "<p>Assuming a linear relationship between Proteome and Transcriptome data, we here fit a linear regression model.</p>\n", | 34 "<p>Assuming a linear relationship between Proteome and Transcriptome data, we here fit a linear regression model.</p>\n", |
| 34 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', | 35 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', |
| 35 file = htmloutfile, append = TRUE); | 36 file = htmloutfile, append = TRUE); |
| 36 | 37 |
| 37 cat("<tr><td>Formula</td><td>","PE_abundance~TE_abundance","</td></tr>\n", | 38 cat("<tr><td>Formula</td><td>","PE_abundance~TE_abundance","</td></tr>\n", |
| 38 "<tr><td colspan='2' align='center'> <b>Coefficients</b></td>","</tr>\n", | 39 "<tr><td colspan='2' align='center'> <b>Coefficients</b></td>","</tr>\n", |
| 39 "<tr><td>",names(regmodel$coefficients[1]),"</td><td>",regmodel$coefficients[1]," (Pvalue:", regmodel_summary$coefficients[1,4],")","</td></tr>\n", | 40 "<tr><td>",names(regmodel$coefficients[1]),"</td><td>",regmodel$coefficients[1]," (Pvalue:", regmodel_summary$coefficients[1,4],")","</td></tr>\n", |
| 40 "<tr><td>",names(regmodel$coefficients[2]),"</td><td>",regmodel$coefficients[2]," (Pvalue:", regmodel_summary$coefficients[2,4],")","</td></tr>\n", | 41 "<tr><td>",names(regmodel$coefficients[2]),"</td><td>",regmodel$coefficients[2]," (Pvalue:", regmodel_summary$coefficients[2,4],")","</td></tr>\n", |
| 41 "<tr><td colspan='2' align='center'> <b>Model parameters</b></td>","</tr>\n", | 42 "<tr><td colspan='2' align='center'> <b>Model parameters</b></td>","</tr>\n", |
| 42 "<tr><td>Residual standard error</td><td>",regmodel_summary$sigma," (",regmodel_summary$df[2]," degree of freedom)</td></tr>\n", | 43 "<tr><td>Residual standard error</td><td>",regmodel_summary$sigma," (",regmodel_summary$df[2]," degree of freedom)</td></tr>\n", |
| 43 "<tr><td>F-statistic</td><td>",regmodel_summary$fstatistic[1]," ( on ",regmodel_summary$fstatistic[2]," and ",regmodel_summary$fstatistic[3]," degree of freedom)</td></tr>\n", | 44 "<tr><td>F-statistic</td><td>",regmodel_summary$fstatistic[1]," ( on ",regmodel_summary$fstatistic[2]," and ",regmodel_summary$fstatistic[3]," degree of freedom)</td></tr>\n", |
| 44 "<tr><td>R-squared</td><td>",regmodel_summary$r.squared,"</td></tr>\n", | 45 "<tr><td>R-squared</td><td>",regmodel_summary$r.squared,"</td></tr>\n", |
| 45 "<tr><td>Adjusted R-squared</td><td>",regmodel_summary$adj.r.squared,"</td></tr>\n", | 46 "<tr><td>Adjusted R-squared</td><td>",regmodel_summary$adj.r.squared,"</td></tr>\n", |
| 46 file = htmloutfile, append = TRUE); | 47 file = htmloutfile, append = TRUE); |
| 47 | 48 |
| 48 cat("</table>\n", file = htmloutfile, append = TRUE); | 49 cat("</table>\n", file = htmloutfile, append = TRUE); |
| 49 | 50 |
| 50 cat( | 51 cat( |
| 51 "<font color='#ff0000'><h3>Regression and diagnostics plots</h3></font>\n", | 52 "<font color='#ff0000'><h3>Regression and diagnostics plots</h3></font>\n", |
| 56 # bitmap(outplot, "png16m"); | 57 # bitmap(outplot, "png16m"); |
| 57 par(mfrow=c(1,1)); | 58 par(mfrow=c(1,1)); |
| 58 plot(regmodel, 1, cex.lab=1.5); | 59 plot(regmodel, 1, cex.lab=1.5); |
| 59 dev.off(); | 60 dev.off(); |
| 60 | 61 |
| 62 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[1]] + | |
| 63 geom_point(aes(text=sprintf("Residual: %.2f<br>Fitted value: %.2f<br>Gene: %s", .fitted, .resid, PE_TE_data$PE_ID)), | |
| 64 shape = 1, size = .1, stroke = .2) + | |
| 65 theme_light()) | |
| 66 saveWidget(ggplotly(g, tooltip= c("text")), file.path(gsub("\\.png", "\\.html", outplot))) | |
| 67 | |
| 61 outplot = paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""); | 68 outplot = paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""); |
| 62 png(outplot,width = 10, height = 10, units = 'in', res=300); | 69 png(outplot,width = 10, height = 10, units = 'in', res=300); |
| 63 # bitmap(outplot, "png16m"); | 70 # bitmap(outplot, "png16m"); |
| 64 par(mfrow=c(1,1)); | 71 par(mfrow=c(1,1)); |
| 65 plot(regmodel, 2, cex.lab=1.5); | 72 g <- plot(regmodel, 2, cex.lab=1.5); |
| 73 ggplotly(g) | |
| 66 dev.off(); | 74 dev.off(); |
| 75 | |
| 76 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[2]] + | |
| 77 geom_point(aes(text=sprintf("Standarized residual: %.2f<br>Theoretical quantile: %.2f<br>Gene: %s", .qqx, .qqy, PE_TE_data$PE_ID)), | |
| 78 shape = 1, size = .1) + | |
| 79 theme_light()) | |
| 80 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot))) | |
| 81 | |
| 67 | 82 |
| 68 outplot = paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""); | 83 outplot = paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""); |
| 69 png(outplot, width = 10, height = 10, units = 'in',res=300); | 84 png(outplot, width = 10, height = 10, units = 'in',res=300); |
| 70 # bitmap(outplot, "png16m"); | 85 # bitmap(outplot, "png16m"); |
| 71 par(mfrow=c(1,1)); | 86 par(mfrow=c(1,1)); |
| 72 plot(regmodel, 5, cex.lab=1.5); | 87 plot(regmodel, 5, cex.lab=1.5); |
| 73 dev.off(); | 88 dev.off(); |
| 74 | 89 |
| 90 cd_cont_pos <- function(leverage, level, model) {sqrt(level*length(coef(model))*(1-leverage)/leverage)} | |
| 91 cd_cont_neg <- function(leverage, level, model) {-cd_cont_pos(leverage, level, model)} | |
| 92 | |
| 93 suppressWarnings(g <- autoplot(regmodel, label = FALSE)[[4]] + | |
| 94 aes(label = PE_TE_data$PE_ID) + | |
| 95 geom_point(aes(text=sprintf("Leverage: %.2f<br>Standardized residual: %.2f<br>Gene: %s", .hat, .stdresid, PE_TE_data$PE_ID))) + | |
| 96 theme_light()) | |
| 97 saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outplot))) | |
| 98 | |
| 75 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE); | 99 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE); |
| 76 | 100 |
| 77 cat( | 101 cat( |
| 78 '<tr bgcolor="#7a0019"><th>', "<font color='#ffcc33'><h4>1) <u>Residuals vs Fitted plot</h4></font></u></th>\n", | 102 '<tr bgcolor="#7a0019"><th>', "<font color='#ffcc33'><h4>1) <u>Residuals vs Fitted plot</h4></font></u></th>\n", |
| 79 '<th><font color=#ffcc33><h4>2) <u>Normal Q-Q plot of residuals</h4></font></u></th></tr>\n', | 103 '<th><font color=#ffcc33><h4>2) <u>Normal Q-Q plot of residuals</h4></font></u></th></tr>\n', |
| 80 file = htmloutfile, append = TRUE); | 104 file = htmloutfile, append = TRUE); |
| 81 | 105 |
| 82 cat( | 106 cat( |
| 83 '<tr><td align=center><img src="PE_TE_lm_1.png" width=600 height=600></td><td align=center><img src="PE_TE_lm_2.png" width=600 height=600></td></tr>\n', | 107 '<tr><td align=center><img src="PE_TE_lm_1.png" width=600 height=600>', |
| 84 file = htmloutfile, append = TRUE); | 108 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$widget_div), |
| 109 '</td><td align=center><img src="PE_TE_lm_2.png" width=600 height=600>', | |
| 110 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$widget_div), | |
| 111 '</td></tr>\n', file = htmloutfile, append = TRUE); | |
| 85 | 112 |
| 86 cat( | 113 cat( |
| 87 '<tr><td align=center>This plot checks for linear relationship assumptions.<br>If a horizontal line is observed without any distinct patterns, it indicates a linear relationship.</td>\n', | 114 '<tr><td align=center>This plot checks for linear relationship assumptions.<br>If a horizontal line is observed without any distinct patterns, it indicates a linear relationship.</td>\n', |
| 88 '<td align=center>This plot checks whether residuals are normally distributed or not.<br>It is good if the residuals points follow the straight dashed line i.e., do not deviate much from dashed line.</td></tr></table>\n', | 115 '<td align=center>This plot checks whether residuals are normally distributed or not.<br>It is good if the residuals points follow the straight dashed line i.e., do not deviate much from dashed line.</td></tr></table>\n', |
| 89 file = htmloutfile, append = TRUE); | 116 file = htmloutfile, append = TRUE); |
| 90 | 117 |
| 91 | 118 |
| 92 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | 119 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| 93 # Residuals data | 120 # Residuals data |
| 94 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | 121 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| 95 res_all = regmodel$residuals; | 122 res_all = regmodel$residuals; |
| 96 res_mean = mean(res_all); | 123 res_mean = mean(res_all); |
| 97 res_sd = sd(res_all); | 124 res_sd = sd(res_all); |
| 98 res_diff = (res_all-res_mean); | 125 res_diff = (res_all-res_mean); |
| 99 res_zscore = res_diff/res_sd; | 126 res_zscore = res_diff/res_sd; |
| 100 # res_outliers = res_all[which((res_zscore > 2)|(res_zscore < -2))] | 127 # res_outliers = res_all[which((res_zscore > 2)|(res_zscore < -2))] |
| 101 | 128 |
| 102 | 129 |
| 103 tempind = which((res_zscore > 2)|(res_zscore < -2)); | 130 tempind = which((res_zscore > 2)|(res_zscore < -2)); |
| 104 res_PE_TE_data_no_outlier = PE_TE_data[-tempind,]; | 131 res_PE_TE_data_no_outlier = PE_TE_data[-tempind,]; |
| 105 res_PE_TE_data_no_outlier$residuals = res_all[-tempind]; | 132 res_PE_TE_data_no_outlier$residuals = res_all[-tempind]; |
| 106 res_PE_TE_data_outlier = PE_TE_data[tempind,]; | 133 res_PE_TE_data_outlier = PE_TE_data[tempind,]; |
| 107 res_PE_TE_data_outlier$residuals = res_all[tempind]; | 134 res_PE_TE_data_outlier$residuals = res_all[tempind]; |
| 108 | 135 |
| 109 # Save the complete table for download (influential_observations) | 136 # Save the complete table for download (influential_observations) |
| 110 temp_outlier_data = data.frame(res_PE_TE_data_outlier$PE_ID, res_PE_TE_data_outlier$TE_abundance, res_PE_TE_data_outlier$PE_abundance, res_PE_TE_data_outlier$residuals) | 137 temp_outlier_data = data.frame(res_PE_TE_data_outlier$PE_ID, res_PE_TE_data_outlier$TE_abundance, res_PE_TE_data_outlier$PE_abundance, res_PE_TE_data_outlier$residuals) |
| 111 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value") | 138 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value") |
| 112 outdatafile = paste(outdir,"/PE_TE_outliers_residuals.txt", sep="", collapse=""); | 139 outdatafile = paste(outdir,"/PE_TE_outliers_residuals.txt", sep="", collapse=""); |
| 113 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); | 140 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); |
| 114 | 141 |
| 115 | 142 |
| 116 # Save the complete table for download (non influential_observations) | 143 # Save the complete table for download (non influential_observations) |
| 117 temp_all_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, res_all) | 144 temp_all_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, res_all) |
| 118 colnames(temp_all_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value") | 145 colnames(temp_all_data) = c("Gene", "Transcript abundance", "Protein abundance", "Residual value") |
| 119 outdatafile = paste(outdir,"/PE_TE_abundance_residuals.txt", sep="", collapse=""); | 146 outdatafile = paste(outdir,"/PE_TE_abundance_residuals.txt", sep="", collapse=""); |
| 120 write.table(temp_all_data, file=outdatafile, row.names=F, sep="\t", quote=F); | 147 write.table(temp_all_data, file=outdatafile, row.names=F, sep="\t", quote=F); |
| 121 | 148 |
| 122 | 149 |
| 123 cat('<br><h2 id="inf_obs"><font color=#ff0000>Outliers based on the residuals from regression analysis</font></h2>\n', | 150 cat('<br><h2 id="inf_obs"><font color=#ff0000>Outliers based on the residuals from regression analysis</font></h2>\n', |
| 124 file = htmloutfile, append = TRUE); | 151 file = htmloutfile, append = TRUE); |
| 125 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', | 152 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', |
| 126 '<tr bgcolor="#7a0019"><th colspan=2><font color=#ffcc33>Residuals from Regression</font></th></tr>\n', | 153 '<tr bgcolor="#7a0019"><th colspan=2><font color=#ffcc33>Residuals from Regression</font></th></tr>\n', |
| 127 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', | 154 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', |
| 128 file = htmloutfile, append = TRUE); | 155 file = htmloutfile, append = TRUE); |
| 129 | 156 |
| 130 cat("<tr><td>Mean Residual value</td><td>",res_mean,"</td></tr>\n", | 157 cat("<tr><td>Mean Residual value</td><td>",res_mean,"</td></tr>\n", |
| 131 "<tr><td>Standard deviation (Residuals)</td><td>",res_sd,"</td></tr>\n", | 158 "<tr><td>Standard deviation (Residuals)</td><td>",res_sd,"</td></tr>\n", |
| 132 '<tr><td>Total outliers (Residual value > 2 standard deviation from the mean)</td><td>',length(tempind),' <font size=4>(<b><a href=PE_TE_outliers_residuals.txt target="_blank">Download these ',length(tempind),' data points with high residual values here</a></b>)</font></td>\n', | 159 '<tr><td>Total outliers (Residual value > 2 standard deviation from the mean)</td><td>',length(tempind),' <font size=4>(<b><a href="PE_TE_outliers_residuals.txt" target="_blank">Download these ',length(tempind),' data points with high residual values here</a></b>)</font></td>\n', |
| 133 '<tr><td colspan=2 align=center><font size=4>(<b><a href=PE_TE_abundance_residuals.txt target="_blank">Download the complete residuals data here</a></b>)</font></td></td>\n', | 160 '<tr><td colspan=2 align=center>', |
| 134 "</table><br><br>\n", | 161 '<font size=4>(<b><a href="PE_TE_abundance_residuals.txt" target="_blank">Download the complete residuals data here</a></b>)</font>', |
| 135 file = htmloutfile, append = TRUE); | 162 "</td></tr>\n</table><br><br>\n", |
| 136 | 163 file = htmloutfile, append = TRUE); |
| 164 | |
| 137 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | 165 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| 138 | 166 |
| 139 | 167 |
| 140 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE); | 168 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">', file = htmloutfile, append = TRUE); |
| 141 | 169 |
| 142 cat( | 170 cat( |
| 143 '<tr bgcolor="#7a0019"><th><font color=#ffcc33><h4>3) <u>Residuals vs Leverage plot</h4></font></u></th></tr>\n', | 171 '<tr bgcolor="#7a0019"><th><font color=#ffcc33><h4>3) <u>Residuals vs Leverage plot</h4></font></u></th></tr>\n', |
| 144 file = htmloutfile, append = TRUE); | 172 file = htmloutfile, append = TRUE); |
| 145 | 173 |
| 146 cat( | 174 cat( |
| 147 '<tr><td align=center><img src="PE_TE_lm_5.png" width=600 height=600></td></tr>\n', | 175 '<tr><td align=center><img src="PE_TE_lm_5.png" width=600 height=600>', |
| 176 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$widget_div) | |
| 177 , '</td></tr>\n', | |
| 148 file = htmloutfile, append = TRUE); | 178 file = htmloutfile, append = TRUE); |
| 149 | 179 |
| 150 cat( | 180 cat( |
| 151 '<tr><td align=center>This plot is useful to identify any influential cases, that is outliers or extreme values.<br>They might influence the regression results upon inclusion or exclusion from the analysis.</td></tr></table><br>\n', | 181 '<tr><td align=center>This plot is useful to identify any influential cases, that is outliers or extreme values.<br>They might influence the regression results upon inclusion or exclusion from the analysis.</td></tr></table><br>\n', |
| 152 file = htmloutfile, append = TRUE); | 182 file = htmloutfile, append = TRUE); |
| 155 | 185 |
| 156 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | 186 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| 157 # Cook's Distance | 187 # Cook's Distance |
| 158 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | 188 #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| 159 cat('<hr/><h2 id="inf_obs"><font color=#ff0000>INFLUENTIAL OBSERVATIONS</font></h2>\n', | 189 cat('<hr/><h2 id="inf_obs"><font color=#ff0000>INFLUENTIAL OBSERVATIONS</font></h2>\n', |
| 160 file = htmloutfile, append = TRUE); | 190 file = htmloutfile, append = TRUE); |
| 161 cat( | 191 cat( |
| 162 '<p><b>Cook\'s distance</b> computes the influence of each data point/observation on the predicted outcome. i.e. this measures how much the observation is influencing the fitted values.<br>In general use, those observations that have a <b>Cook\'s distance > than ', cookdist_upper_cutoff,' times the mean</b> may be classified as <b>influential.</b></p>\n', | 192 '<p><b>Cook\'s distance</b> computes the influence of each data point/observation on the predicted outcome. i.e. this measures how much the observation is influencing the fitted values.<br>In general use, those observations that have a <b>Cook\'s distance > than ', cookdist_upper_cutoff,' times the mean</b> may be classified as <b>influential.</b></p>\n', |
| 163 file = htmloutfile, append = TRUE); | 193 file = htmloutfile, append = TRUE); |
| 164 | 194 |
| 165 cooksd <- cooks.distance(regmodel); | 195 cooksd <- cooks.distance(regmodel); |
| 175 points(sel_nonoutlier, cooksd[sel_nonoutlier],pch="*", cex=2, cex.lab=1.5, col="black") | 205 points(sel_nonoutlier, cooksd[sel_nonoutlier],pch="*", cex=2, cex.lab=1.5, col="black") |
| 176 abline(h = as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T), col="red") # add cutoff line | 206 abline(h = as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T), col="red") # add cutoff line |
| 177 #text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T),names(cooksd),""), col="red", pos=2) # add labels | 207 #text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T),names(cooksd),""), col="red", pos=2) # add labels |
| 178 dev.off(); | 208 dev.off(); |
| 179 | 209 |
| 210 cooksd_df <- data.frame(cooksd) | |
| 211 cooksd_df$genes <- row.names(cooksd_df) | |
| 212 cooksd_df$index <- 1:nrow(cooksd_df) | |
| 213 cooksd_df$colors <- "black" | |
| 214 cutoff <- as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T) | |
| 215 cooksd_df[cooksd_df$cooksd > cutoff,]$colors <- "red" | |
| 216 | |
| 217 g <- ggplot(cooksd_df, aes(x = index, y = cooksd, label = row.names(cooksd_df), color=as.factor(colors), | |
| 218 text=sprintf("Gene: %s<br>Cook's Distance: %.3f", row.names(cooksd_df), cooksd))) + | |
| 219 ggtitle("Influential Obs. by Cook's distance") + xlab("Observations") + ylab("Cook's Distance") + | |
| 220 #xlim(0, 3000) + ylim(0, .15) + | |
| 221 scale_shape_discrete(solid=F) + | |
| 222 geom_point(size = 2, shape = 8) + | |
| 223 geom_hline(yintercept = cutoff, | |
| 224 linetype = "dashed", color = "red") + | |
| 225 scale_color_manual(values = c("black" = "black", "red" = "red")) + | |
| 226 theme_light() + theme(legend.position="none") | |
| 227 saveWidget(ggplotly(g, tooltip= "text"), file.path(gsub("\\.png", "\\.html", outplot))) | |
| 228 | |
| 180 cat( | 229 cat( |
| 181 '<img src="PE_TE_lm_cooksd.png" width=800 height=800>', | 230 '<img src="PE_TE_lm_cooksd.png" width=800 height=800>', |
| 231 gsub("width:500px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div), | |
| 182 '<br>In the above plot, observations above red line (',cookdist_upper_cutoff,' * mean Cook\'s distance) are influential. Genes that are outliers could be important. These observations influences the correlation values and regression coefficients<br><br>', | 232 '<br>In the above plot, observations above red line (',cookdist_upper_cutoff,' * mean Cook\'s distance) are influential. Genes that are outliers could be important. These observations influences the correlation values and regression coefficients<br><br>', |
| 183 file = htmloutfile, append = TRUE); | 233 file = htmloutfile, append = TRUE); |
| 184 | 234 |
| 185 tempind = which(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T)); | 235 tempind = which(cooksd>as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T)); |
| 186 PE_TE_data_no_outlier = PE_TE_data[-tempind,]; | 236 PE_TE_data_no_outlier = PE_TE_data[-tempind,]; |
| 187 PE_TE_data_no_outlier$cooksd = cooksd[-tempind]; | 237 PE_TE_data_no_outlier$cooksd = cooksd[-tempind]; |
| 188 PE_TE_data_outlier = PE_TE_data[tempind,]; | 238 PE_TE_data_outlier = PE_TE_data[tempind,]; |
| 189 PE_TE_data_outlier$cooksd = cooksd[tempind]; | 239 PE_TE_data_outlier$cooksd = cooksd[tempind]; |
| 193 cat( | 243 cat( |
| 194 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', | 244 '<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Value</font></th></tr>\n', |
| 195 file = htmloutfile, append = TRUE); | 245 file = htmloutfile, append = TRUE); |
| 196 | 246 |
| 197 # Save the complete table for download (influential_observations) | 247 # Save the complete table for download (influential_observations) |
| 198 temp_outlier_data = data.frame(PE_TE_data_outlier$PE_ID, PE_TE_data_outlier$TE_abundance, PE_TE_data_outlier$PE_abundance, PE_TE_data_outlier$cooksd) | 248 temp_outlier_data = data.frame(PE_TE_data_outlier$PE_ID, PE_TE_data_outlier$TE_abundance, PE_TE_data_outlier$PE_abundance, PE_TE_data_outlier$cooksd) |
| 199 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance") | 249 colnames(temp_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance") |
| 200 outdatafile = paste(outdir,"/PE_TE_influential_observation.txt", sep="", collapse=""); | 250 outdatafile = paste(outdir,"/PE_TE_influential_observation.txt", sep="", collapse=""); |
| 201 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); | 251 write.table(temp_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); |
| 202 | 252 |
| 203 | 253 |
| 204 # Save the complete table for download (non influential_observations) | 254 # Save the complete table for download (non influential_observations) |
| 205 temp_no_outlier_data = data.frame(PE_TE_data_no_outlier$PE_ID, PE_TE_data_no_outlier$TE_abundance, PE_TE_data_no_outlier$PE_abundance, PE_TE_data_no_outlier$cooksd) | 255 temp_no_outlier_data = data.frame(PE_TE_data_no_outlier$PE_ID, PE_TE_data_no_outlier$TE_abundance, PE_TE_data_no_outlier$PE_abundance, PE_TE_data_no_outlier$cooksd) |
| 206 colnames(temp_no_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance") | 256 colnames(temp_no_outlier_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cook's distance") |
| 207 outdatafile = paste(outdir,"/PE_TE_non_influential_observation.txt", sep="", collapse=""); | 257 outdatafile = paste(outdir,"/PE_TE_non_influential_observation.txt", sep="", collapse=""); |
| 208 write.table(temp_no_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); | 258 write.table(temp_no_outlier_data, file=outdatafile, row.names=F, sep="\t", quote=F); |
| 209 | 259 |
| 210 | 260 |
| 211 cat("<tr><td>Mean Cook\'s distance</td><td>",mean(cooksd, na.rm=T),"</td></tr>\n", | 261 cat("<tr><td>Mean Cook\'s distance</td><td>",mean(cooksd, na.rm=T),"</td></tr>\n", |
| 212 "<tr><td>Total influential observations (Cook\'s distance > ",cookdist_upper_cutoff," * mean Cook\'s distance)</td><td>",length(tempind),"</td>\n", | 262 "<tr><td>Total influential observations (Cook\'s distance > ",cookdist_upper_cutoff," * mean Cook\'s distance)</td><td>",length(tempind),"</td>\n", |
| 213 | 263 |
| 214 "<tr><td>Observations with Cook\'s distance < ",cookdist_upper_cutoff," * mean Cook\'s distance</td><td>",length(which(cooksd<as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T))),"</td>\n", | 264 "<tr><td>Observations with Cook\'s distance < ",cookdist_upper_cutoff," * mean Cook\'s distance</td><td>",length(which(cooksd<as.numeric(cookdist_upper_cutoff)*mean(cooksd, na.rm=T))),"</td>\n", |
| 215 "</table><br><br>\n", | 265 "</table><br><br>\n", |
| 216 file = htmloutfile, append = TRUE); | 266 file = htmloutfile, append = TRUE); |
| 217 | 267 |
| 218 | 268 |
| 219 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | 269 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| 220 # Scatter plot after removal of influential points | 270 # Scatter plot after removal of influential points |
| 221 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | 271 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ |
| 222 outplot = paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""); | 272 outplot = paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""); |
| 223 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); | 273 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); |
| 224 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); | 274 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); |
| 225 png(outplot, width = 10, height = 10, units = 'in', res=300); | 275 png(outplot, width = 10, height = 10, units = 'in', res=300); |
| 226 # bitmap(outplot,"png16m"); | 276 # bitmap(outplot,"png16m"); |
| 227 g = ggplot(PE_TE_data_no_outlier, aes(x=TE_abundance, y=PE_abundance))+geom_point() + geom_smooth() + xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + xlim(min_lim,max_lim) + ylim(min_lim,max_lim); | 277 suppressWarnings(g <- ggplot(PE_TE_data_no_outlier, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() + |
| 228 suppressMessages(plot(g)); | 278 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + |
| 229 dev.off(); | 279 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) + |
| 230 | 280 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f", |
| 231 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatterplot: Before removal</font></th><th><font color=#ffcc33>Scatterplot: After removal</font></th></tr>\n', file = htmloutfile, append = TRUE); | 281 PE_ID, TE_abundance, PE_abundance)))) |
| 232 # Before | 282 suppressMessages(plot(g)) |
| 233 cat("<tr><td align=center><!--<font color='#ff0000'><h3>Scatter plot between Proteome and Transcriptome Abundance</h3></font>\n-->", '<img src="TE_PE_scatter.png" width=600 height=600></td>\n', file = htmloutfile, append = TRUE); | 283 suppressMessages(saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot)))) |
| 234 | 284 dev.off(); |
| 235 # After | 285 |
| 236 cat("<td align=center>\n", | 286 |
| 237 '<img src="AbundancePlot_scatter_without_outliers.png" width=600 height=600></td></tr>\n', | 287 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatterplot: Before removal</font></th><th><font color=#ffcc33>Scatterplot: After removal</font></th></tr>\n', file = htmloutfile, append = TRUE); |
| 238 file = htmloutfile, append = TRUE); | 288 # Before |
| 239 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | 289 cat("<tr><td align=center><!--<font color='#ff0000'><h3>Scatter plot between Proteome and Transcriptome Abundance</h3></font>\n-->", |
| 240 | 290 '<img src="TE_PE_scatter.png" width=600 height=600>', |
| 291 gsub('id="html', 'id="secondhtml"', | |
| 292 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(paste(outdir,"/TE_PE_scatter.png",sep="",collapse=""))$widget_div)), | |
| 293 '</td>\n', | |
| 294 file = htmloutfile, append = TRUE); | |
| 295 | |
| 296 # After | |
| 297 cat("<td align=center>\n", | |
| 298 '<img src="AbundancePlot_scatter_without_outliers.png" width=600 height=600>', | |
| 299 gsub("width:500px;height:500px", "width:600px;height:600px", extractWidgetCode(outplot)$widget_div), | |
| 300 | |
| 301 '</td></tr>\n', | |
| 302 file = htmloutfile, append = TRUE); | |
| 303 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
| 304 | |
| 241 | 305 |
| 242 cor_result_pearson = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "pearson"); | 306 cor_result_pearson = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "pearson"); |
| 243 cor_result_spearman = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "spearman"); | 307 cor_result_spearman = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "spearman"); |
| 244 cor_result_kendall = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "kendall"); | 308 cor_result_kendall = cor.test(PE_TE_data_no_outlier[,"TE_abundance"], PE_TE_data_no_outlier[,"PE_abundance"], method = "kendall"); |
| 245 | 309 |
| 247 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); | 311 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); |
| 248 cat('</td>\n', file = htmloutfile, append=TRUE); | 312 cat('</td>\n', file = htmloutfile, append=TRUE); |
| 249 | 313 |
| 250 | 314 |
| 251 cat('<td><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Method 1</font></th><th><font color=#ffcc33>Method 2</font></th><th><font color=#ffcc33>Method 3</font></th></tr>\n', | 315 cat('<td><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Parameter</font></th><th><font color=#ffcc33>Method 1</font></th><th><font color=#ffcc33>Method 2</font></th><th><font color=#ffcc33>Method 3</font></th></tr>\n', |
| 252 file = htmloutfile, append = TRUE); | 316 file = htmloutfile, append = TRUE); |
| 253 | 317 |
| 254 cat( | 318 cat( |
| 255 "<tr><td>Correlation method</td><td>",cor_result_pearson$method,"</td><td>",cor_result_spearman$method,"</td><td>",cor_result_kendall$method,"</td></tr>\n", | 319 "<tr><td>Correlation method</td><td>",cor_result_pearson$method,"</td><td>",cor_result_spearman$method,"</td><td>",cor_result_kendall$method,"</td></tr>\n", |
| 256 "<tr><td>Correlation coefficient</td><td>",cor_result_pearson$estimate,"</td><td>",cor_result_spearman$estimate,"</td><td>",cor_result_kendall$estimate,"</td></tr>\n", | 320 "<tr><td>Correlation coefficient</td><td>",cor_result_pearson$estimate,"</td><td>",cor_result_spearman$estimate,"</td><td>",cor_result_kendall$estimate,"</td></tr>\n", |
| 257 file = htmloutfile, append = TRUE) | 321 file = htmloutfile, append = TRUE) |
| 265 }else{ | 329 }else{ |
| 266 tab_n_row = 10; | 330 tab_n_row = 10; |
| 267 } | 331 } |
| 268 | 332 |
| 269 cat("<br><br><font size=5><b><a href='PE_TE_influential_observation.txt' target='_blank'>Download the complete list of influential observations</a></b></font> ", | 333 cat("<br><br><font size=5><b><a href='PE_TE_influential_observation.txt' target='_blank'>Download the complete list of influential observations</a></b></font> ", |
| 270 "<font size=5><b><a href='PE_TE_non_influential_observation.txt' target='_blank'>Download the complete list (After removing influential points)</a></b></font><br>\n", | 334 "<font size=5><b><a href='PE_TE_non_influential_observation.txt' target='_blank'>Download the complete list (After removing influential points)</a></b></font><br>\n", |
| 271 '<br><font color="brown"><h4>Top ',as.character(tab_n_row),' Influential observations (Cook\'s distance > ',cookdist_upper_cutoff,' * mean Cook\'s distance)</h4></font>\n', | 335 '<br><font color="brown"><h4>Top ',as.character(tab_n_row),' Influential observations (Cook\'s distance > ',cookdist_upper_cutoff,' * mean Cook\'s distance)</h4></font>\n', |
| 272 file = htmloutfile, append = TRUE); | 336 file = htmloutfile, append = TRUE); |
| 273 | 337 |
| 274 cat('<table border=1 cellspacing=0 cellpadding=5> <tr bgcolor="#7a0019">\n', sep = "",file = htmloutfile, append = TRUE); | 338 cat('<table border=1 cellspacing=0 cellpadding=5> <tr bgcolor="#7a0019">\n', sep = "",file = htmloutfile, append = TRUE); |
| 275 cat("<th><font color=#ffcc33>Gene</font></th><th><font color=#ffcc33>Protein Log Fold-Change</font></th><th><font color=#ffcc33>Transcript Log Fold-Change</font></th><th><font color=#ffcc33>Cook's Distance</font></th></tr>\n", | 339 cat("<th><font color=#ffcc33>Gene</font></th><th><font color=#ffcc33>Protein Log Fold-Change</font></th><th><font color=#ffcc33>Transcript Log Fold-Change</font></th><th><font color=#ffcc33>Cook's Distance</font></th></tr>\n", |
| 276 file = htmloutfile, append = TRUE); | 340 file = htmloutfile, append = TRUE); |
| 277 | 341 |
| 278 | 342 |
| 279 for(i in 1:tab_n_row) | 343 for(i in 1:tab_n_row) |
| 280 { | 344 { |
| 281 cat( | 345 cat( |
| 283 '<td>',format(PE_TE_data_outlier_sorted[i,2], scientific=F),'</td>\n', | 347 '<td>',format(PE_TE_data_outlier_sorted[i,2], scientific=F),'</td>\n', |
| 284 '<td>',PE_TE_data_outlier_sorted[i,4],'</td>\n', | 348 '<td>',PE_TE_data_outlier_sorted[i,4],'</td>\n', |
| 285 '<td>',format(PE_TE_data_outlier_sorted[i,5], scientific=F),'</td></tr>\n', | 349 '<td>',format(PE_TE_data_outlier_sorted[i,5], scientific=F),'</td></tr>\n', |
| 286 file = htmloutfile, append = TRUE); | 350 file = htmloutfile, append = TRUE); |
| 287 } | 351 } |
| 288 cat('</table><br><br>\n',file = htmloutfile, append = TRUE); | 352 cat('</table><br><br>\n',file = htmloutfile, append = TRUE); |
| 289 | 353 |
| 290 | 354 |
| 291 } | 355 } |
| 292 | 356 |
| 293 | 357 |
| 294 | 358 |
| 295 #=============================================================================== | 359 #=============================================================================== |
| 296 # Heatmap | 360 # Heatmap |
| 297 #=============================================================================== | 361 #=============================================================================== |
| 298 singlesample_heatmap=function(PE_TE_data, htmloutfile, hm_nclust){ | 362 singlesample_heatmap=function(PE_TE_data, htmloutfile, hm_nclust){ |
| 299 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Heatmap of PE and TE abundance values (Hierarchical clustering)</font></th><th><font color=#ffcc33>Number of clusters to extract: ',hm_nclust,'</font></th></tr>\n', | 363 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Heatmap of PE and TE abundance values (Hierarchical clustering)</font></th><th><font color=#ffcc33>Number of clusters to extract: ',hm_nclust,'</font></th></tr>\n', |
| 300 file = htmloutfile, append = TRUE); | 364 file = htmloutfile, append = TRUE); |
| 301 | 365 |
| 302 hc=hclust(dist(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]))) | 366 hc=hclust(dist(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]))) |
| 303 hm_cluster = cutree(hc,k=hm_nclust); | 367 hm_cluster = cutree(hc,k=hm_nclust); |
| 304 | 368 |
| 305 outplot = paste(outdir,"/PE_TE_heatmap.png",sep="",collapse=""); | 369 outplot = paste(outdir,"/PE_TE_heatmap.png",sep="",collapse=""); |
| 306 png(outplot, width = 10, height = 10, units = 'in', res=300); | 370 png(outplot, width = 10, height = 10, units = 'in', res=300); |
| 307 # bitmap(outplot, "png16m"); | 371 # bitmap(outplot, "png16m"); |
| 308 par(mfrow=c(1,1)); | 372 par(mfrow=c(1,1)); |
| 309 hmap = heatmap.2(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]), trace="none", cexCol=1, col=greenred(100),Colv=F, labCol=c("Proteins","Transcripts"), scale="col", hclustfun = hclust, distfun = dist); | 373 hmap = heatmap.2(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]), |
| 374 trace="none", cexCol=1, col=greenred(100),Colv=F, | |
| 375 labCol=c("Proteins","Transcripts"), scale="col", | |
| 376 hclustfun = hclust, distfun = dist); | |
| 377 | |
| 310 dev.off(); | 378 dev.off(); |
| 311 | 379 |
| 312 cat('<tr><td align=center colspan="2"><img src="PE_TE_heatmap.png" width=800 height=800></td></tr>\n', | 380 |
| 313 file = htmloutfile, append = TRUE); | 381 p <- d3heatmap(as.matrix(PE_TE_data[,c("PE_abundance","TE_abundance")]), scale = "col", |
| 382 dendrogram = "row", colors = greenred(100), | |
| 383 hclustfun = hclust, distfun = dist, | |
| 384 show_grid = FALSE) | |
| 385 saveWidget(p, file.path(gsub("\\.png", "\\.html", outplot))) | |
| 386 | |
| 387 cat('<tr><td align=center colspan="2">', | |
| 388 '<img src="PE_TE_heatmap.png" width=800 height=800>', | |
| 389 gsub("width:960px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div), | |
| 390 '</td></tr>\n', | |
| 391 file = htmloutfile, append = TRUE); | |
| 314 | 392 |
| 315 | 393 |
| 316 temp_PE_TE_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, hm_cluster); | 394 temp_PE_TE_data = data.frame(PE_TE_data$PE_ID, PE_TE_data$TE_abundance, PE_TE_data$PE_abundance, hm_cluster); |
| 317 colnames(temp_PE_TE_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cluster (Hierarchical clustering)") | 395 colnames(temp_PE_TE_data) = c("Gene", "Transcript abundance", "Protein abundance", "Cluster (Hierarchical clustering)") |
| 318 tempoutfile = paste(outdir,"/PE_TE_hc_clusterpoints.txt",sep="",collapse=""); | 396 tempoutfile = paste(outdir,"/PE_TE_hc_clusterpoints.txt",sep="",collapse=""); |
| 319 write.table(temp_PE_TE_data, file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n") | 397 write.table(temp_PE_TE_data, file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n") |
| 320 | 398 |
| 321 | 399 |
| 322 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_hc_clusterpoints.txt" target="_blank"><b>Download the hierarchical cluster list</b></a></font></td></tr></table>\n', | 400 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_hc_clusterpoints.txt" target="_blank"><b>Download the hierarchical cluster list</b></a></font></td></tr></table>\n', |
| 323 file = htmloutfile, append = TRUE); | 401 file = htmloutfile, append = TRUE); |
| 324 } | 402 } |
| 325 | 403 |
| 326 | 404 |
| 327 #=============================================================================== | 405 #=============================================================================== |
| 328 # K-means clustering | 406 # K-means clustering |
| 335 # bitmap(outplot, "png16m"); | 413 # bitmap(outplot, "png16m"); |
| 336 par(mfrow=c(1,1)); | 414 par(mfrow=c(1,1)); |
| 337 scatter.smooth(PE_TE_data_kdata[,"TE_abundance"], PE_TE_data_kdata[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5); | 415 scatter.smooth(PE_TE_data_kdata[,"TE_abundance"], PE_TE_data_kdata[,"PE_abundance"], xlab="Transcript Abundance", ylab="Protein Abundance", cex.lab=1.5); |
| 338 legend(1, 95, legend=c("Cluster 1", "Line 2"), col="red", lty=1:1, cex=0.8) | 416 legend(1, 95, legend=c("Cluster 1", "Line 2"), col="red", lty=1:1, cex=0.8) |
| 339 legend(1, 95, legend="Cluster 2", col="green", lty=1:1, cex=0.8) | 417 legend(1, 95, legend="Cluster 2", col="green", lty=1:1, cex=0.8) |
| 340 | |
| 341 | 418 |
| 342 ind=which(k1$cluster==1); | 419 ind=which(k1$cluster==1); |
| 343 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="red", pch=16); | 420 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="red", pch=16); |
| 344 ind=which(k1$cluster==2); | 421 ind=which(k1$cluster==2); |
| 345 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="green", pch=16); | 422 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="green", pch=16); |
| 359 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="yellow", pch=16); | 436 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="yellow", pch=16); |
| 360 ind=which(k1$cluster==10); | 437 ind=which(k1$cluster==10); |
| 361 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="orange", pch=16); | 438 points(PE_TE_data_kdata[ind,"TE_abundance"], PE_TE_data_kdata[ind,"PE_abundance"], col="orange", pch=16); |
| 362 dev.off(); | 439 dev.off(); |
| 363 | 440 |
| 441 # Interactive plot for k-means clustering | |
| 442 g <- ggplot(PE_TE_data, aes(x = TE_abundance, y = PE_abundance, label = row.names(PE_TE_data), | |
| 443 text=sprintf("Gene: %s<br>Transcript Abundance: %.3f<br>Protein Abundance: %.3f", | |
| 444 PE_ID, TE_abundance, PE_abundance), | |
| 445 color=as.factor(k1$cluster))) + | |
| 446 xlab("Transcript Abundance") + ylab("Protein Abundance") + | |
| 447 scale_shape_discrete(solid=F) + geom_smooth(method = "loess", span = 2/3) + | |
| 448 geom_point(size = 1, shape = 8) + | |
| 449 theme_light() + theme(legend.position="none") | |
| 450 saveWidget(ggplotly(g, tooltip=c("text")), file.path(gsub("\\.png", "\\.html", outplot))) | |
| 451 | |
| 364 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>K-mean clustering</font></th><th><font color=#ffcc33>Number of clusters: ',nclust,'</font></th></tr>\n', | 452 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>K-mean clustering</font></th><th><font color=#ffcc33>Number of clusters: ',nclust,'</font></th></tr>\n', |
| 365 file = htmloutfile, append = TRUE); | 453 file = htmloutfile, append = TRUE); |
| 366 | 454 |
| 367 tempind = order(k1$cluster); | 455 tempind = order(k1$cluster); |
| 368 tempoutfile = paste(outdir,"/PE_TE_kmeans_clusterpoints.txt",sep="",collapse=""); | 456 tempoutfile = paste(outdir,"/PE_TE_kmeans_clusterpoints.txt",sep="",collapse=""); |
| 369 write.table(data.frame(PE_TE_data_kdata[tempind, ], Cluster=k1$cluster[tempind]), file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n") | 457 write.table(data.frame(PE_TE_data_kdata[tempind, ], Cluster=k1$cluster[tempind]), file=tempoutfile, row.names=F, quote=F, sep="\t", eol="\n") |
| 370 | 458 |
| 371 | 459 #paste(outdir,"/PE_TE_heatmap.png",sep="",collapse=""); |
| 372 cat('<tr><td colspan="2" align=center><img src="PE_TE_kmeans.png" width=800 height=800></td></tr>\n', | 460 cat('<tr><td colspan="2" align=center><img src="PE_TE_kmeans.png" width=800 height=800>', |
| 373 file = htmloutfile, append = TRUE); | 461 gsub("width:500px;height:500px", "width:800px;height:800px", extractWidgetCode(outplot)$widget_div), '</td></tr>\n', |
| 462 file = htmloutfile, append = TRUE); | |
| 374 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_kmeans_clusterpoints.txt" target="_blank"><b>Download the cluster list</b></a></font></td></tr></table><br><hr/>\n', | 463 cat('<tr><td colspan="2" align=center><font size=5><a href="PE_TE_kmeans_clusterpoints.txt" target="_blank"><b>Download the cluster list</b></a></font></td></tr></table><br><hr/>\n', |
| 375 file = htmloutfile, append = TRUE); | 464 file = htmloutfile, append = TRUE); |
| 376 | 465 |
| 377 } | 466 } |
| 378 | 467 |
| 379 #=============================================================================== | 468 #=============================================================================== |
| 380 # scatter plot | 469 # scatter plot |
| 383 { | 472 { |
| 384 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); | 473 min_lim = min(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); |
| 385 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); | 474 max_lim = max(c(PE_TE_data$PE_abundance,PE_TE_data$TE_abundance)); |
| 386 png(outfile, width = 10, height = 10, units = 'in', res=300); | 475 png(outfile, width = 10, height = 10, units = 'in', res=300); |
| 387 # bitmap(outfile, "png16m"); | 476 # bitmap(outfile, "png16m"); |
| 388 g = ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance))+geom_point() + geom_smooth() + xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + xlim(min_lim,max_lim) + ylim(min_lim,max_lim); | 477 suppressWarnings(g <- ggplot(PE_TE_data, aes(x=TE_abundance, y=PE_abundance, label=PE_ID)) + geom_smooth() + |
| 389 suppressMessages(plot(g)); | 478 xlab("Transcript abundance log fold-change") + ylab("Protein abundance log fold-change") + |
| 390 # plot(g); | 479 xlim(min_lim,max_lim) + ylim(min_lim,max_lim) + |
| 480 geom_point(aes(text=sprintf("Gene: %s<br>Transcript Abundance (log fold-change): %.3f<br>Protein Abundance (log fold-change): %.3f", | |
| 481 PE_ID, TE_abundance, PE_abundance)), | |
| 482 size = .5)) | |
| 483 suppressMessages(plot(g)) | |
| 484 suppressMessages(saveWidget(ggplotly(g, tooltip = "text"), file.path(gsub("\\.png", "\\.html", outfile)))) | |
| 391 dev.off(); | 485 dev.off(); |
| 392 } | 486 } |
| 393 | 487 |
| 394 #=============================================================================== | 488 #=============================================================================== |
| 395 # Correlation table | 489 # Correlation table |
| 420 tempdf = df[,-1, drop=FALSE]; | 514 tempdf = df[,-1, drop=FALSE]; |
| 421 tempdf = t(tempdf) %>% as.data.frame(); | 515 tempdf = t(tempdf) %>% as.data.frame(); |
| 422 tempdf[is.na(tempdf)] = 0; | 516 tempdf[is.na(tempdf)] = 0; |
| 423 tempdf$Sample = rownames(tempdf); | 517 tempdf$Sample = rownames(tempdf); |
| 424 tempdf1 = melt(tempdf, id.vars = "Sample"); | 518 tempdf1 = melt(tempdf, id.vars = "Sample"); |
| 519 | |
| 520 if("Gene" %in% colnames(df)){ | |
| 521 tempdf1$Name = df$Gene; | |
| 522 } else if ("Protein" %in% colnames(df)){ | |
| 523 tempdf1$Name = df$Protein; | |
| 524 } else if ("Genes" %in% colnames(df)){ | |
| 525 tempdf1$Name = df$Genes; | |
| 526 } | |
| 527 | |
| 425 tempdf1$Group = sampleinfo_df[tempdf1$Sample,2]; | 528 tempdf1$Group = sampleinfo_df[tempdf1$Sample,2]; |
| 426 png(outplot, width = 6, height = 6, units = 'in', res=300); | 529 png(outplot, width = 6, height = 6, units = 'in', res=300); |
| 427 # bitmap(outplot, "png16m"); | 530 # bitmap(outplot, "png16m"); |
| 428 if(fill_leg=="Yes") | 531 if(fill_leg=="No"){ |
| 429 { | 532 tempdf1$Group = c("case", "control") |
| 430 g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) + geom_boxplot() + labs(x=user_xlab) + labs(y=user_ylab) | 533 } |
| 534 | |
| 535 g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) + | |
| 536 geom_boxplot()+ | |
| 537 labs(x=user_xlab) + labs(y=user_ylab) | |
| 538 saveWidget(ggplotly(g), file.path(gsub("\\.png", "\\.html", outfile))) | |
| 539 plot(g); | |
| 540 dev.off(); | |
| 541 } | |
| 542 | |
| 543 ## A wrapper to saveWidget which compensates for arguable BUG in | |
| 544 ## saveWidget which requires `file` to be in current working | |
| 545 ## directory. | |
| 546 saveWidget <- function (widget,file,...) { | |
| 547 wd<-getwd() | |
| 548 on.exit(setwd(wd)) | |
| 549 outDir<-dirname(file) | |
| 550 file<-basename(file) | |
| 551 setwd(outDir); | |
| 552 htmlwidgets::saveWidget(widget,file=file,selfcontained = FALSE) | |
| 553 } | |
| 554 | |
| 555 #=============================================================================== | |
| 556 # Mean or Median of Replicates | |
| 557 #=============================================================================== | |
| 558 | |
| 559 mergeReplicates = function(TE_df,PE_df, sampleinfo_df, method) | |
| 560 { | |
| 561 grps = unique(sampleinfo_df[,2]); | |
| 562 | |
| 563 TE_df_merged <<- sapply(grps, function(x){ | |
| 564 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] | |
| 565 if(length(tempsample)!=1){ | |
| 566 apply(TE_df[,tempsample],1,method); | |
| 567 }else{ | |
| 568 return(TE_df[,tempsample]); | |
| 569 } | |
| 570 }); | |
| 571 TE_df_merged <<- data.frame(as.character(TE_df[,1]), TE_df_merged); | |
| 572 colnames(TE_df_merged) = c(colnames(TE_df)[1], grps); | |
| 573 | |
| 574 PE_df_merged <<- sapply(grps, function(x){ | |
| 575 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] | |
| 576 if(length(tempsample)!=1){ | |
| 577 apply(PE_df[,tempsample],1,method); | |
| 578 }else{ | |
| 579 return(PE_df[,tempsample]); | |
| 580 } | |
| 581 }); | |
| 582 | |
| 583 PE_df_merged <<- data.frame(as.character(PE_df[,1]), PE_df_merged); | |
| 584 colnames(PE_df_merged) = c(colnames(PE_df)[1], grps); | |
| 585 | |
| 586 #sampleinfo_df_merged = data.frame(Sample = grps, Group = grps, stringsAsFactors = F); | |
| 587 sampleinfo_df_merged = data.frame(Sample = grps, Group = "Group", stringsAsFactors = F); | |
| 588 | |
| 589 return(list(TE_df_merged = TE_df_merged, PE_df_merged = PE_df_merged, sampleinfo_df_merged = sampleinfo_df_merged)); | |
| 590 } | |
| 591 | |
| 592 #=============================================================================== | |
| 593 # (T-Test or Wilcoxon ranksum test) and Volcano Plot | |
| 594 #=============================================================================== | |
| 595 | |
| 596 perform_Test_Volcano = function(TE_df_data,PE_df_data,TE_df_logfold, PE_df_logfold,sampleinfo_df, method, correction_method,volc_with) | |
| 597 { | |
| 598 | |
| 599 PE_colnames = colnames(PE_df_data); | |
| 600 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; | |
| 601 control_ind <<- sapply(control_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); | |
| 602 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; | |
| 603 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); | |
| 604 | |
| 605 if(method=="mean"){ | |
| 606 #PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); | |
| 607 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) | |
| 431 }else{ | 608 }else{ |
| 432 if(fill_leg=="No") | 609 if(method=="median"){ |
| 433 { | 610 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) |
| 434 tempdf1$Group = c("case", "control") | 611 # PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); |
| 435 g = ggplot(tempdf1, aes(x=Sample, y=value, fill=Group)) + geom_boxplot() + labs(x=user_xlab) + labs(y=user_ylab) | |
| 436 } | 612 } |
| 437 } | 613 } |
| 438 plot(g); | 614 PE_adj_pval = p.adjust(PE_pval, method = correction_method, n = length(PE_pval)) |
| 439 dev.off(); | 615 |
| 440 } | 616 |
| 441 | 617 TE_colnames = colnames(TE_df_data); |
| 442 | 618 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; |
| 443 #=============================================================================== | 619 control_ind <<- sapply(control_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); |
| 444 # Mean or Median of Replicates | 620 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; |
| 445 #=============================================================================== | 621 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); |
| 446 | 622 |
| 447 mergeReplicates = function(TE_df,PE_df, sampleinfo_df, method) | 623 if(method=="mean"){ |
| 448 { | 624 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); |
| 449 grps = unique(sampleinfo_df[,2]); | 625 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) |
| 450 | |
| 451 TE_df_merged <<- sapply(grps, function(x){ | |
| 452 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] | |
| 453 if(length(tempsample)!=1){ | |
| 454 apply(TE_df[,tempsample],1,method); | |
| 455 }else{ | 626 }else{ |
| 456 return(TE_df[,tempsample]); | 627 if(method=="median"){ |
| 628 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) | |
| 629 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); | |
| 630 } | |
| 457 } | 631 } |
| 458 }); | 632 TE_adj_pval = p.adjust(TE_pval, method = correction_method, n = length(TE_pval)) |
| 459 TE_df_merged <<- data.frame(as.character(TE_df[,1]), TE_df_merged); | 633 |
| 460 colnames(TE_df_merged) = c(colnames(TE_df)[1], grps); | 634 |
| 461 | 635 PE_TE_logfold_pval = data.frame(TE_df_logfold$Gene, TE_df_logfold$LogFold, TE_pval, TE_adj_pval, PE_df_logfold$LogFold, PE_pval, PE_adj_pval); |
| 462 PE_df_merged <<- sapply(grps, function(x){ | 636 colnames(PE_TE_logfold_pval) = c("Gene", "Transcript log fold-change", "p-value (transcript)", "adj p-value (transcript)", "Protein log fold-change", "p-value (protein)", "adj p-value (protein)"); |
| 463 tempsample = sampleinfo_df[which(sampleinfo_df$Group==x),1] | 637 outdatafile = paste(outdir,"/PE_TE_logfold_pval.txt", sep="", collapse=""); |
| 464 if(length(tempsample)!=1){ | 638 write.table(PE_TE_logfold_pval, file=outdatafile, row.names=F, sep="\t", quote=F); |
| 465 apply(PE_df[,tempsample],1,method); | 639 cat("<br><br><font size=5><b><a href='PE_TE_logfold_pval.txt' target='_blank'>Download the complete fold change data here</a></b></font><br>\n", |
| 640 file = htmloutfile, append = TRUE); | |
| 641 | |
| 642 if(length(condition_ind)!=1) | |
| 643 { | |
| 644 # Volcano Plot | |
| 645 | |
| 646 if(volc_with=="adj_pval") | |
| 647 { | |
| 648 PE_pval = PE_adj_pval | |
| 649 TE_pval = TE_adj_pval | |
| 650 volc_ylab = "-log10 Adjusted p-value"; | |
| 651 }else{ | |
| 652 if(volc_with=="pval") | |
| 653 { | |
| 654 volc_ylab = "-log10 p-value"; | |
| 655 } | |
| 656 } | |
| 657 outplot_PE = paste(outdir,"/PE_volcano.png",sep="",collapse=""); | |
| 658 png(outplot_PE, width = 10, height = 10, units = 'in', res=300); | |
| 659 # bitmap(outplot, "png16m"); | |
| 660 par(mfrow=c(1,1)); | |
| 661 | |
| 662 plot(PE_df_logfold$LogFold, -log10(PE_pval), | |
| 663 xlab="log2 fold change", ylab=volc_ylab, | |
| 664 type="n") | |
| 665 sel <- which((PE_df_logfold$LogFold<=log(2,base=2))&(PE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use | |
| 666 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="black") | |
| 667 PE_df_logfold$color <- "black" | |
| 668 #sel <- which((PE_df_logfold$LogFold>log(2,base=2))&(PE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use | |
| 669 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2))) | |
| 670 sel1 <- which(PE_pval<=0.05) | |
| 671 sel=intersect(sel,sel1) | |
| 672 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="red") | |
| 673 PE_df_logfold[sel,]$color <- "red" | |
| 674 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2))) | |
| 675 sel1 <- which(PE_pval>0.05) | |
| 676 sel=intersect(sel,sel1) | |
| 677 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="blue") | |
| 678 PE_df_logfold[sel,]$color <- "blue" | |
| 679 abline(h = -log(0.05,base=10), col="red", lty=2) | |
| 680 abline(v = log(2,base=2), col="red", lty=2) | |
| 681 abline(v = log(0.5,base=2), col="red", lty=2) | |
| 682 dev.off(); | |
| 683 | |
| 684 g <- ggplot(PE_df_logfold, aes(x = LogFold, -log10(PE_pval), color = as.factor(color), | |
| 685 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f", | |
| 686 Genes, LogFold, -log10(PE_pval), PE_pval))) + | |
| 687 xlab("log2 fold change") + ylab("-log10 p-value") + | |
| 688 geom_point(shape=1, size = 1.5, stroke = .2) + | |
| 689 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) + | |
| 690 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") + | |
| 691 geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") + | |
| 692 geom_vline(xintercept = log(0.5,base=2), linetype="dashed", color="red") + | |
| 693 theme_light() + theme(legend.position="none") | |
| 694 saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot_PE))) | |
| 695 | |
| 696 outplot_TE = paste(outdir,"/TE_volcano.png",sep="",collapse=""); | |
| 697 png(outplot_TE, width = 10, height = 10, units = 'in', res=300); | |
| 698 # bitmap(outplot, "png16m"); | |
| 699 par(mfrow=c(1,1)); | |
| 700 | |
| 701 plot(TE_df_logfold$LogFold, -log10(TE_pval), | |
| 702 xlab="log2 fold change", ylab=volc_ylab, | |
| 703 type="n") | |
| 704 | |
| 705 sel <- which((TE_df_logfold$LogFold<=log(2,base=2))&(TE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use | |
| 706 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="black") | |
| 707 TE_df_logfold$color <- "black" | |
| 708 #sel <- which((TE_df_logfold$LogFold>log(2,base=2))&(TE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use | |
| 709 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2))) | |
| 710 sel1 <- which(TE_pval<=0.05) | |
| 711 sel=intersect(sel,sel1) | |
| 712 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="red") | |
| 713 TE_df_logfold[sel,]$color <- "red" | |
| 714 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2))) | |
| 715 sel1 <- which(TE_pval>0.05) | |
| 716 sel=intersect(sel,sel1) | |
| 717 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="blue") | |
| 718 TE_df_logfold[sel,]$color <- "blue" | |
| 719 abline(h = -log(0.05,base=10), col="red", lty=2) | |
| 720 abline(v = log(2,base=2), col="red", lty=2) | |
| 721 abline(v = log(0.5,base=2), col="red", lty=2) | |
| 722 dev.off(); | |
| 723 | |
| 724 g <- ggplot(TE_df_logfold, aes(x = LogFold, -log10(TE_pval), color = as.factor(color), | |
| 725 text=sprintf("Gene: %s<br>Log2 Fold-Change: %.3f<br>-log10 p-value: %.3f<br>p-value: %.3f", | |
| 726 Genes, LogFold, -log10(TE_pval), TE_pval))) + | |
| 727 xlab("log2 fold change") + ylab("-log10 p-value") + | |
| 728 geom_point(shape=1, size = 1.5, stroke = .2) + | |
| 729 scale_color_manual(values = c("black" = "black", "red" = "red", "blue" = "blue")) + | |
| 730 geom_hline(yintercept = -log(0.05,base=10), linetype="dashed", color="red") + | |
| 731 geom_vline(xintercept = log(2,base=2), linetype="dashed", color="red") + | |
| 732 geom_vline(xintercept = log(0.5,base=2), linetype="dashed", color="red") + | |
| 733 theme_light() + theme(legend.position="none") | |
| 734 saveWidget(ggplotly(g, tooltip="text"), file.path(gsub("\\.png", "\\.html", outplot_TE))) | |
| 735 | |
| 736 | |
| 737 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Transcript Fold-Change</font></th><th><font color=#ffcc33>Protein Fold-Change</font></th></tr>\n', file = htmloutfile, append = TRUE); | |
| 738 cat("<tr><td align=center>", | |
| 739 '<img src="TE_volcano.png" width=600 height=600>', | |
| 740 extractWidgetCode(outplot_TE)$widget_div, | |
| 741 '</td>\n', file = htmloutfile, append = TRUE); | |
| 742 cat("<td align=center>", | |
| 743 '<img src="PE_volcano.png" width=600 height=600>', | |
| 744 extractWidgetCode(outplot_PE)$widget_div, | |
| 745 '</td></tr></table><br>\n', | |
| 746 file = htmloutfile, append = TRUE); | |
| 747 | |
| 748 | |
| 466 }else{ | 749 }else{ |
| 467 return(PE_df[,tempsample]); | 750 cat('<br><br><b><font color=red>!!! No replicates found. Cannot perform test to check significance of differential expression. Thus, no Volcano plot generated !!!</font></b><br><br>', |
| 751 file = htmloutfile, append = TRUE); | |
| 468 } | 752 } |
| 469 }); | 753 |
| 470 | |
| 471 PE_df_merged <<- data.frame(as.character(PE_df[,1]), PE_df_merged); | |
| 472 colnames(PE_df_merged) = c(colnames(PE_df)[1], grps); | |
| 473 | |
| 474 #sampleinfo_df_merged = data.frame(Sample = grps, Group = grps, stringsAsFactors = F); | |
| 475 sampleinfo_df_merged = data.frame(Sample = grps, Group = "Group", stringsAsFactors = F); | |
| 476 | |
| 477 return(list(TE_df_merged = TE_df_merged, PE_df_merged = PE_df_merged, sampleinfo_df_merged = sampleinfo_df_merged)); | |
| 478 } | |
| 479 | |
| 480 #=============================================================================== | |
| 481 # (T-Test or Wilcoxon ranksum test) and Volcano Plot | |
| 482 #=============================================================================== | |
| 483 | |
| 484 perform_Test_Volcano = function(TE_df_data,PE_df_data,TE_df_logfold, PE_df_logfold,sampleinfo_df, method, correction_method,volc_with) | |
| 485 { | |
| 486 | |
| 487 PE_colnames = colnames(PE_df_data); | |
| 488 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; | |
| 489 control_ind <<- sapply(control_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); | |
| 490 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; | |
| 491 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(PE_colnames==x); as.numeric(temp_ind)}); | |
| 492 | |
| 493 if(method=="mean"){ | |
| 494 #PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); | |
| 495 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) | |
| 496 }else{ | |
| 497 if(method=="median"){ | |
| 498 PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) | |
| 499 # PE_pval = apply(PE_df_data[2:length(colnames(PE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); | |
| 500 } | |
| 501 } | |
| 502 PE_adj_pval = p.adjust(PE_pval, method = correction_method, n = length(PE_pval)) | |
| 503 | |
| 504 | |
| 505 TE_colnames = colnames(TE_df_data); | |
| 506 control_sample = sampleinfo_df[which(sampleinfo_df$Group=="control"),1]; | |
| 507 control_ind <<- sapply(control_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); | |
| 508 condition_sample = sampleinfo_df[which(sampleinfo_df$Group=="case"),1]; | |
| 509 condition_ind <<- sapply(condition_sample, function(x){temp_ind = which(TE_colnames==x); as.numeric(temp_ind)}); | |
| 510 | |
| 511 if(method=="mean"){ | |
| 512 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) t.test(x[condition_ind-1], x[control_ind-1])$p.value); | |
| 513 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(t.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) | |
| 514 }else{ | |
| 515 if(method=="median"){ | |
| 516 TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) {obj<-try(wilcox.test(x[condition_ind-1], x[control_ind-1]),silent=TRUE); if(is(obj, "try-error")){return(NA)}else{return(obj$p.value)}}) | |
| 517 # TE_pval = apply(TE_df_data[2:length(colnames(TE_df_data))],1,function(x) wilcox.test(x[condition_ind-1], x[control_ind-1])$p.value); | |
| 518 } | |
| 519 } | |
| 520 TE_adj_pval = p.adjust(TE_pval, method = correction_method, n = length(TE_pval)) | |
| 521 | |
| 522 | |
| 523 PE_TE_logfold_pval = data.frame(TE_df_logfold$Gene, TE_df_logfold$LogFold, TE_pval, TE_adj_pval, PE_df_logfold$LogFold, PE_pval, PE_adj_pval); | |
| 524 colnames(PE_TE_logfold_pval) = c("Gene", "Transcript log fold-change", "p-value (transcript)", "adj p-value (transcript)", "Protein log fold-change", "p-value (protein)", "adj p-value (protein)"); | |
| 525 outdatafile = paste(outdir,"/PE_TE_logfold_pval.txt", sep="", collapse=""); | |
| 526 write.table(PE_TE_logfold_pval, file=outdatafile, row.names=F, sep="\t", quote=F); | |
| 527 cat("<br><br><font size=5><b><a href='PE_TE_logfold_pval.txt' target='_blank'>Download the complete fold change data here</a></b></font><br>\n", | |
| 528 file = htmloutfile, append = TRUE); | |
| 529 | |
| 530 if(length(condition_ind)!=1) | |
| 531 { | |
| 532 # Volcano Plot | |
| 533 | |
| 534 if(volc_with=="adj_pval") | |
| 535 { | |
| 536 PE_pval = PE_adj_pval | |
| 537 TE_pval = TE_adj_pval | |
| 538 volc_ylab = "-log10 Adjusted p-value"; | |
| 539 }else{ | |
| 540 if(volc_with=="pval") | |
| 541 { | |
| 542 volc_ylab = "-log10 p-value"; | |
| 543 } | |
| 544 } | |
| 545 outplot = paste(outdir,"/PE_volcano.png",sep="",collapse=""); | |
| 546 png(outplot, width = 10, height = 10, units = 'in', res=300); | |
| 547 # bitmap(outplot, "png16m"); | |
| 548 par(mfrow=c(1,1)); | |
| 549 | |
| 550 plot(PE_df_logfold$LogFold, -log10(PE_pval), | |
| 551 xlab="log2 fold change", ylab=volc_ylab, | |
| 552 type="n") | |
| 553 sel <- which((PE_df_logfold$LogFold<=log(2,base=2))&(PE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use | |
| 554 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="black") | |
| 555 #sel <- which((PE_df_logfold$LogFold>log(2,base=2))&(PE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use | |
| 556 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2))) | |
| 557 sel1 <- which(PE_pval<=0.05) | |
| 558 sel=intersect(sel,sel1) | |
| 559 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="red") | |
| 560 sel <- which((PE_df_logfold$LogFold>log(2,base=2))|(PE_df_logfold$LogFold<log(0.5, base=2))) | |
| 561 sel1 <- which(PE_pval>0.05) | |
| 562 sel=intersect(sel,sel1) | |
| 563 points(PE_df_logfold[sel,"LogFold"], -log10(PE_pval[sel]),col="blue") | |
| 564 abline(h = -log(0.05,base=10), col="red", lty=2) | |
| 565 abline(v = log(2,base=2), col="red", lty=2) | |
| 566 abline(v = log(0.5,base=2), col="red", lty=2) | |
| 567 dev.off(); | |
| 568 | |
| 569 | |
| 570 | |
| 571 outplot = paste(outdir,"/TE_volcano.png",sep="",collapse=""); | |
| 572 png(outplot, width = 10, height = 10, units = 'in', res=300); | |
| 573 # bitmap(outplot, "png16m"); | |
| 574 par(mfrow=c(1,1)); | |
| 575 | |
| 576 plot(TE_df_logfold$LogFold, -log10(TE_pval), | |
| 577 xlab="log2 fold change", ylab=volc_ylab, | |
| 578 type="n") | |
| 579 sel <- which((TE_df_logfold$LogFold<=log(2,base=2))&(TE_df_logfold$LogFold>=log(0.5, base=2))) # or whatever you want to use | |
| 580 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="black") | |
| 581 #sel <- which((TE_df_logfold$LogFold>log(2,base=2))&(TE_df_logfold$LogFold<log(0.5,base=2))) # or whatever you want to use | |
| 582 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2))) | |
| 583 sel1 <- which(TE_pval<=0.05) | |
| 584 sel=intersect(sel,sel1) | |
| 585 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="red") | |
| 586 sel <- which((TE_df_logfold$LogFold>log(2,base=2))|(TE_df_logfold$LogFold<log(0.5, base=2))) | |
| 587 sel1 <- which(TE_pval>0.05) | |
| 588 sel=intersect(sel,sel1) | |
| 589 points(TE_df_logfold[sel,"LogFold"], -log10(TE_pval[sel]),col="blue") | |
| 590 abline(h = -log(0.05,base=10), col="red", lty=2) | |
| 591 abline(v = log(2,base=2), col="red", lty=2) | |
| 592 abline(v = log(0.5,base=2), col="red", lty=2) | |
| 593 dev.off(); | |
| 594 | |
| 595 | |
| 596 | |
| 597 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Transcript Fold-Change</font></th><th><font color=#ffcc33>Protein Fold-Change</font></th></tr>\n', file = htmloutfile, append = TRUE); | |
| 598 cat("<tr><td align=center>", '<img src="TE_volcano.png" width=600 height=600></td>\n', file = htmloutfile, append = TRUE); | |
| 599 cat("<td align=center>", | |
| 600 '<img src="PE_volcano.png" width=600 height=600></td></tr></table><br>\n', | |
| 601 file = htmloutfile, append = TRUE); | |
| 602 }else{ | |
| 603 cat('<br><br><b><font color=red>!!! No replicates found. Cannot perform test to check significance of differential expression. Thus, no Volcano plot generated !!!</font></b><br><br>', | |
| 604 file = htmloutfile, append = TRUE); | |
| 605 } | |
| 606 | |
| 607 } | 754 } |
| 608 | 755 |
| 609 | 756 |
| 610 #*************************************************************************************************************************************** | 757 #*************************************************************************************************************************************** |
| 611 # Functions: End | 758 # Functions: End |
| 612 #*************************************************************************************************************************************** | 759 #*************************************************************************************************************************************** |
| 613 | |
| 614 | |
| 615 | 760 |
| 616 | 761 |
| 617 #=============================================================================== | 762 #=============================================================================== |
| 618 # Arguments | 763 # Arguments |
| 619 #=============================================================================== | 764 #=============================================================================== |
| 636 volc_with = args[10]; | 781 volc_with = args[10]; |
| 637 | 782 |
| 638 htmloutfile = args[11]; # html output file | 783 htmloutfile = args[11]; # html output file |
| 639 outdir = args[12]; # html supporting files | 784 outdir = args[12]; # html supporting files |
| 640 | 785 |
| 641 | |
| 642 #=============================================================================== | 786 #=============================================================================== |
| 643 # Check for file existance | 787 # Check for file existance |
| 644 #=============================================================================== | 788 #=============================================================================== |
| 645 if(! file.exists(proteome_file)) | 789 if(! file.exists(proteome_file)) |
| 646 { | 790 { |
| 659 suppressPackageStartupMessages(library(dplyr)); | 803 suppressPackageStartupMessages(library(dplyr)); |
| 660 suppressPackageStartupMessages(library(data.table)); | 804 suppressPackageStartupMessages(library(data.table)); |
| 661 suppressPackageStartupMessages(library(gplots)); | 805 suppressPackageStartupMessages(library(gplots)); |
| 662 suppressPackageStartupMessages(library(ggplot2)); | 806 suppressPackageStartupMessages(library(ggplot2)); |
| 663 suppressPackageStartupMessages(library(ggfortify)); | 807 suppressPackageStartupMessages(library(ggfortify)); |
| 808 suppressPackageStartupMessages(library(plotly)); | |
| 809 suppressPackageStartupMessages(library(d3heatmap)); | |
| 664 | 810 |
| 665 #=============================================================================== | 811 #=============================================================================== |
| 666 # Select mode and parse experiment design file | 812 # Select mode and parse experiment design file |
| 667 #=============================================================================== | 813 #=============================================================================== |
| 668 if(mode=="multiple") | 814 if(mode=="multiple") |
| 669 { | 815 { |
| 670 expDesign = fread(sampleinfo_file, header = FALSE, stringsAsFactors = FALSE, sep="\t") %>% data.frame(); | 816 expDesign = fread(sampleinfo_file, header = FALSE, stringsAsFactors = FALSE, sep="\t") %>% data.frame(); |
| 671 expDesign_cc = expDesign[1:2,]; | 817 expDesign_cc = expDesign[1:2,]; |
| 672 | 818 |
| 673 sampleinfo_df = expDesign[3:nrow(expDesign),]; | 819 sampleinfo_df = expDesign[3:nrow(expDesign),]; |
| 674 rownames(sampleinfo_df)=1:nrow(sampleinfo_df); | 820 rownames(sampleinfo_df)=1:nrow(sampleinfo_df); |
| 675 colnames(sampleinfo_df) = c("Sample","Group"); | 821 colnames(sampleinfo_df) = c("Sample","Group"); |
| 676 | 822 |
| 677 condition_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),1]; | 823 condition_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),1]; |
| 678 condition_g_name = "case"; | 824 condition_g_name = "case"; |
| 679 control_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),1]; | 825 control_cols = sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),1]; |
| 680 control_g_name = "control"; | 826 control_g_name = "control"; |
| 681 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),2] = "case"; | 827 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="case"),2]),2] = "case"; |
| 682 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),2] = "control"; | 828 sampleinfo_df[which(sampleinfo_df[,2]==expDesign_cc[which(expDesign_cc[,1]=="control"),2]),2] = "control"; |
| 683 sampleinfo_df_orig = sampleinfo_df; | 829 sampleinfo_df_orig = sampleinfo_df; |
| 684 } | 830 } |
| 685 | 831 |
| 686 if(mode=="logfold") | 832 if(mode=="logfold") |
| 687 { | 833 { |
| 688 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change")) | 834 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change")) |
| 689 } | 835 } |
| 690 | 836 |
| 691 #=============================================================================== | 837 #=============================================================================== |
| 692 # Parse Transcriptome data | 838 # Parse Transcriptome data |
| 693 #=============================================================================== | 839 #=============================================================================== |
| 694 TE_df_orig = fread(transcriptome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame(); | 840 TE_df_orig = fread(transcriptome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame(); |
| 695 if(mode=="multiple") | 841 if(mode=="multiple") |
| 696 { | 842 { |
| 697 TE_df = TE_df_orig[,c(colnames(TE_df_orig)[1],condition_cols,control_cols)]; | 843 TE_df = TE_df_orig[,c(colnames(TE_df_orig)[1],condition_cols,control_cols)]; |
| 698 } | 844 } |
| 699 if(mode=="logfold") | 845 if(mode=="logfold") |
| 700 { | 846 { |
| 701 TE_df = TE_df_orig; | 847 TE_df = TE_df_orig; |
| 702 colnames(TE_df) = c("Genes", "LogFold"); | 848 colnames(TE_df) = c("Genes", "LogFold"); |
| 703 } | 849 } |
| 704 #=============================================================================== | 850 #=============================================================================== |
| 705 # Parse Proteome data | 851 # Parse Proteome data |
| 706 #=============================================================================== | 852 #=============================================================================== |
| 707 PE_df_orig = fread(proteome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame(); | 853 PE_df_orig = fread(proteome_file, sep="\t", stringsAsFactor=F, header=T) %>% data.frame(); |
| 708 if(mode=="multiple") | 854 if(mode=="multiple") |
| 709 { | 855 { |
| 710 PE_df = PE_df_orig[,c(colnames(PE_df_orig)[1],condition_cols,control_cols)]; | 856 PE_df = PE_df_orig[,c(colnames(PE_df_orig)[1],condition_cols,control_cols)]; |
| 711 } | 857 } |
| 712 if(mode=="logfold") | 858 if(mode=="logfold") |
| 713 { | 859 { |
| 714 PE_df = PE_df_orig; | 860 PE_df = PE_df_orig; |
| 715 colnames(PE_df) = c("Genes", "LogFold"); | 861 colnames(PE_df) = c("Genes", "LogFold"); |
| 716 } | 862 } |
| 717 | 863 |
| 718 #============================================================================================================= | 864 #============================================================================================================= |
| 719 # Create directory structures and then set the working directory to output directory | 865 # Create directory structures and then set the working directory to output directory |
| 720 #============================================================================================================= | 866 #============================================================================================================= |
| 723 dir.create(outdir); | 869 dir.create(outdir); |
| 724 } | 870 } |
| 725 #=============================================================================== | 871 #=============================================================================== |
| 726 # Write initial data summary in html outfile | 872 # Write initial data summary in html outfile |
| 727 #=============================================================================== | 873 #=============================================================================== |
| 728 cat("<html><head></head><body>\n", file = htmloutfile); | 874 cat("<html><head></head><body>\n", file = htmloutfile); |
| 729 | 875 |
| 730 cat("<h1><u>QuanTP: Association between abundance ratios of transcript and protein</u></h1><hr/>\n", | 876 cat("<h1><u>QuanTP: Association between abundance ratios of transcript and protein</u></h1><hr/>\n", |
| 731 "<font><h3>Input data summary</h3></font>\n", | 877 "<font><h3>Input data summary</h3></font>\n", |
| 732 "<ul>\n", | 878 "<ul>\n", |
| 733 "<li>Abbreviations used: PE (Proteome data) and TE (Transcriptome data)","</li><br>\n", | 879 "<li>Abbreviations used: PE (Proteome data) and TE (Transcriptome data)","</li><br>\n", |
| 734 "<li>Input Proteome data dimension (Row Column): ", dim(PE_df)[1]," x ", dim(PE_df)[2],"</li>\n", | 880 "<li>Input Proteome data dimension (Row Column): ", dim(PE_df)[1]," x ", dim(PE_df)[2],"</li>\n", |
| 735 "<li>Input Transcriptome data dimension (Row Column): ", dim(TE_df)[1]," x ", dim(TE_df)[2],"</li></ul><hr/>\n", | 881 "<li>Input Transcriptome data dimension (Row Column): ", dim(TE_df)[1]," x ", dim(TE_df)[2],"</li></ul><hr/>\n", |
| 736 file = htmloutfile, append = TRUE); | 882 file = htmloutfile, append = TRUE); |
| 737 | 883 |
| 738 cat("<h3 id=table_of_content>Table of Contents:</h3>\n", | 884 cat("<h3 id=table_of_content>Table of Contents:</h3>\n", |
| 739 "<ul>\n", | 885 "<ul>\n", |
| 740 "<li><a href=#sample_dist>Sample distribution</a></li>\n", | 886 "<li><a href=#sample_dist>Sample distribution</a></li>\n", |
| 741 "<li><a href=#corr_data>Correlation</a></li>\n", | 887 "<li><a href=#corr_data>Correlation</a></li>\n", |
| 742 "<li><a href=#regression_data>Regression analysis</a></li>\n", | 888 "<li><a href=#regression_data>Regression analysis</a></li>\n", |
| 743 "<li><a href=#inf_obs>Influential observations</a></li>\n", | 889 "<li><a href=#inf_obs>Influential observations</a></li>\n", |
| 791 PE_nacount = sum(is.na(PE_df)); | 937 PE_nacount = sum(is.na(PE_df)); |
| 792 | 938 |
| 793 TE_df[is.na(TE_df)] = 0; | 939 TE_df[is.na(TE_df)] = 0; |
| 794 PE_df[is.na(PE_df)] = 0; | 940 PE_df[is.na(PE_df)] = 0; |
| 795 | 941 |
| 942 #=============================================================================== | |
| 943 # Obtain JS/HTML lines for interactive visualization | |
| 944 #=============================================================================== | |
| 945 extractWidgetCode = function(outplot){ | |
| 946 lines <- readLines(gsub("\\.png", "\\.html", outplot)) | |
| 947 return(list( | |
| 948 'prescripts' = c('', | |
| 949 gsub('script', 'script', | |
| 950 lines[grep('<head>',lines) + 3 | |
| 951 :grep('</head>' ,lines) - 5]), | |
| 952 ''), | |
| 953 'widget_div' = paste('<!--', | |
| 954 gsub('width:100%;height:400px', | |
| 955 'width:500px;height:500px', | |
| 956 lines[grep(lines, pattern='html-widget')]), | |
| 957 '-->', sep=''), | |
| 958 'postscripts' = paste('', | |
| 959 gsub('script', 'script', | |
| 960 lines[grep(lines, pattern='<script type')]), | |
| 961 '', sep=''))); | |
| 962 } | |
| 963 prescripts <- list() | |
| 964 postscripts <- list() | |
| 965 | |
| 796 | 966 |
| 797 #=============================================================================== | 967 #=============================================================================== |
| 798 # Decide based on analysis mode | 968 # Decide based on analysis mode |
| 799 #=============================================================================== | 969 #=============================================================================== |
| 800 if(mode=="logfold") | 970 if(mode=="logfold") |
| 801 { | 971 { |
| 802 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n', | 972 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n', |
| 803 file = htmloutfile, append = TRUE); | 973 file = htmloutfile, append = TRUE); |
| 804 | 974 |
| 805 # TE Boxplot | 975 # TE Boxplot |
| 806 outplot = paste(outdir,"/Box_TE.png",sep="",collape=""); | 976 outplot = paste(outdir,"/Box_TE.png",sep="",collape=""); |
| 807 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', | 977 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', |
| 808 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', | 978 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', |
| 809 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE); | 979 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE); |
| 810 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance data"); | 980 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance data"); |
| 811 | 981 |
| 812 # PE Boxplot | 982 # PE Boxplot |
| 813 outplot = paste(outdir,"/Box_PE.png",sep="",collape=""); | 983 outplot = paste(outdir,"/Box_PE.png",sep="",collape=""); |
| 814 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE); | 984 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE); |
| 815 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance data"); | 985 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance data"); |
| 816 | 986 |
| 817 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n', | 987 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n', |
| 818 file = htmloutfile, append = TRUE); | 988 file = htmloutfile, append = TRUE); |
| 819 | 989 |
| 820 # TE PE scatter | 990 # TE PE scatter |
| 821 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape=""); | 991 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape=""); |
| 822 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE); | 992 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE); |
| 823 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800></td></tr>\n', file = htmloutfile, append = TRUE); | 993 singlesample_scatter(PE_TE_data, outplot); |
| 994 lines <- extractWidgetCode(outplot); | |
| 995 postscripts <- c(postscripts, lines$postscripts); | |
| 996 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800>', lines$widget_div, '</td></tr>\n', file = htmloutfile, append = TRUE); | |
| 824 PE_TE_data = data.frame(PE_df, TE_df); | 997 PE_TE_data = data.frame(PE_df, TE_df); |
| 825 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance"); | 998 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance"); |
| 826 singlesample_scatter(PE_TE_data, outplot); | |
| 827 | 999 |
| 828 # TE PE Cor | 1000 # TE PE Cor |
| 829 cat("<tr><td align=center>", file = htmloutfile, append = TRUE); | 1001 cat("<tr><td align=center>", file = htmloutfile, append = TRUE); |
| 830 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); | 1002 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); |
| 831 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n', | 1003 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n', |
| 832 file = htmloutfile, append = TRUE); | 1004 file = htmloutfile, append = TRUE); |
| 833 cat('</td></table>', | 1005 cat('</td></table>', |
| 834 file = htmloutfile, append = TRUE); | 1006 file = htmloutfile, append = TRUE); |
| 835 | 1007 |
| 836 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n', | 1008 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n', |
| 837 file = htmloutfile, append = TRUE); | 1009 file = htmloutfile, append = TRUE); |
| 838 | 1010 |
| 839 # TE PE Regression | 1011 # TE PE Regression |
| 840 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE); | 1012 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE); |
| 1013 postscripts <- c(postscripts, c(extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$postscripts, | |
| 1014 extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$postscripts, | |
| 1015 extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$postscripts, | |
| 1016 extractWidgetCode(paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse=""))$postscripts, | |
| 1017 extractWidgetCode(paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""))$postscripts)); | |
| 841 | 1018 |
| 842 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n', | 1019 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n', |
| 843 file = htmloutfile, append = TRUE); | 1020 file = htmloutfile, append = TRUE); |
| 844 | 1021 |
| 845 # TE PE Heatmap | 1022 # TE PE Heatmap |
| 846 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust); | 1023 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust); |
| 1024 lines <- extractWidgetCode(paste(outdir,"/PE_TE_heatmap.png",sep="",collapse="")) | |
| 1025 postscripts <- c(postscripts, lines$postscripts) | |
| 1026 prescripts <- c(prescripts, lines$prescripts) | |
| 847 | 1027 |
| 848 | 1028 |
| 849 # TE PE Clustering (kmeans) | 1029 # TE PE Clustering (kmeans) |
| 850 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster)) | 1030 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster)) |
| 1031 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_TE_kmeans.png",sep="",collapse=""))$postscripts) | |
| 851 | 1032 |
| 852 }else{ | 1033 }else{ |
| 853 if(mode=="multiple") | 1034 if(mode=="multiple") |
| 854 { | 1035 { |
| 855 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n', | 1036 cat('<h2 id="sample_dist"><font color=#ff0000>SAMPLE DISTRIBUTION</font></h2>\n', |
| 856 file = htmloutfile, append = TRUE); | 1037 file = htmloutfile, append = TRUE); |
| 857 | 1038 |
| 858 # TE Boxplot | 1039 # TE Boxplot |
| 859 outplot = paste(outdir,"/Box_TE_all_rep.png",sep="",collape=""); | 1040 outplot = paste(outdir,"/Box_TE_all_rep.png",sep="",collape=""); |
| 860 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', | |
| 861 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', | |
| 862 "<tr><td align=center>", '<img src="Box_TE_all_rep.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE); | |
| 863 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)])); | 1041 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)])); |
| 864 colnames(temp_df_te_data) = colnames(TE_df); | 1042 colnames(temp_df_te_data) = colnames(TE_df); |
| 865 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance (log)"); | 1043 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "Yes", "Samples", "Transcript Abundance (log)"); |
| 1044 lines <- extractWidgetCode(outplot) | |
| 1045 prescripts <- c(prescripts, lines$prescripts) | |
| 1046 postscripts <- c(postscripts, lines$postscripts) | |
| 1047 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; ">\n', | |
| 1048 '<tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', | |
| 1049 "<tr><td align=center>", file = htmloutfile, append = TRUE); | |
| 1050 cat('<img src="Box_TE_all_rep.png" width=500 height=500>', | |
| 1051 lines$widget_div, '</td>', file = htmloutfile, append = TRUE); | |
| 866 | 1052 |
| 867 # PE Boxplot | 1053 # PE Boxplot |
| 868 outplot = paste(outdir,"/Box_PE_all_rep.png",sep="",collape=""); | 1054 outplot = paste(outdir,"/Box_PE_all_rep.png",sep="",collape=""); |
| 869 cat("<td align=center>", '<img src="Box_PE_all_rep.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE); | |
| 870 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)])); | 1055 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)])); |
| 871 colnames(temp_df_pe_data) = colnames(PE_df); | 1056 colnames(temp_df_pe_data) = colnames(PE_df); |
| 872 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance (log)"); | 1057 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "Yes", "Samples", "Protein Abundance (log)"); |
| 1058 lines <- extractWidgetCode(outplot) | |
| 1059 #prescripts <- c(prescripts, lines$prescripts) | |
| 1060 postscripts <- c(postscripts, lines$postscripts) | |
| 1061 cat("<td align=center>", '<img src="Box_PE_all_rep.png" width=500 height=500>', | |
| 1062 lines$widget_div, '</td></tr></table>\n', file = htmloutfile, append = TRUE); | |
| 873 | 1063 |
| 874 # Calc TE PCA | 1064 # Calc TE PCA |
| 875 outplot = paste(outdir,"/PCA_TE_all_rep.png",sep="",collape=""); | 1065 outplot = paste(outdir,"/PCA_TE_all_rep.png",sep="",collape=""); |
| 876 multisample_PCA(TE_df, sampleinfo_df, outplot); | 1066 multisample_PCA(TE_df, sampleinfo_df, outplot); |
| 877 | 1067 PCA_TE <- extractWidgetCode(outplot) |
| 1068 postscripts <- c(postscripts, PCA_TE$postscripts) | |
| 1069 | |
| 878 # Calc PE PCA | 1070 # Calc PE PCA |
| 879 outplot = paste(outdir,"/PCA_PE_all_rep.png",sep="",collape=""); | 1071 outplot = paste(outdir,"/PCA_PE_all_rep.png",sep="",collape=""); |
| 880 multisample_PCA(PE_df, sampleinfo_df, outplot); | 1072 multisample_PCA(PE_df, sampleinfo_df, outplot); |
| 881 | 1073 PCA_PE <- extractWidgetCode(outplot) |
| 1074 postscripts <- c(postscripts, PCA_PE$postscripts) | |
| 882 | 1075 |
| 883 # Replicate mode | 1076 # Replicate mode |
| 884 templist = mergeReplicates(TE_df,PE_df, sampleinfo_df, method); | 1077 templist = mergeReplicates(TE_df,PE_df, sampleinfo_df, method); |
| 885 TE_df = templist$TE_df_merged; | 1078 TE_df = templist$TE_df_merged; |
| 886 PE_df = templist$PE_df_merged; | 1079 PE_df = templist$PE_df_merged; |
| 887 sampleinfo_df = templist$sampleinfo_df_merged; | 1080 sampleinfo_df = templist$sampleinfo_df_merged; |
| 888 rownames(sampleinfo_df) = sampleinfo_df[,1]; | 1081 rownames(sampleinfo_df) = sampleinfo_df[,1]; |
| 889 | 1082 |
| 890 # TE Boxplot | 1083 # TE Boxplot |
| 891 outplot = paste(outdir,"/Box_TE_rep.png",sep="",collape=""); | 1084 outplot = paste(outdir,"/Box_TE_rep.png",sep="",collape=""); |
| 892 cat('<br><font color="#ff0000"><h3>Sample wise distribution (Box plot) after using ',method,' on replicates </h3></font><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', | |
| 893 "<tr><td align=center>", '<img src="Box_TE_rep.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE); | |
| 894 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)])); | 1085 temp_df_te_data = data.frame(TE_df[,1], log(TE_df[,2:length(TE_df)])); |
| 895 colnames(temp_df_te_data) = colnames(TE_df); | 1086 colnames(temp_df_te_data) = colnames(TE_df); |
| 896 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Transcript Abundance (log)"); | 1087 multisample_boxplot(temp_df_te_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Transcript Abundance (log)"); |
| 897 | 1088 lines <- extractWidgetCode(outplot) |
| 1089 #prescripts <- c(prescripts, lines$prescripts) | |
| 1090 postscripts <- c(postscripts, lines$postscripts) | |
| 1091 cat('<br><font color="#ff0000"><h3>Sample wise distribution (Box plot) after using ',method,' on replicates </h3></font><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', | |
| 1092 "<tr><td align=center>", '<img src="Box_TE_rep.png" width=500 height=500>', lines$widget_div, '</td>\n', file = htmloutfile, append = TRUE); | |
| 1093 | |
| 898 # PE Boxplot | 1094 # PE Boxplot |
| 899 outplot = paste(outdir,"/Box_PE_rep.png",sep="",collape=""); | 1095 outplot = paste(outdir,"/Box_PE_rep.png",sep="",collape=""); |
| 900 cat("<td align=center>", '<img src="Box_PE_rep.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE); | |
| 901 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)])); | 1096 temp_df_pe_data = data.frame(PE_df[,1], log(PE_df[,2:length(PE_df)])); |
| 902 colnames(temp_df_pe_data) = colnames(PE_df); | 1097 colnames(temp_df_pe_data) = colnames(PE_df); |
| 903 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Protein Abundance (log)"); | 1098 multisample_boxplot(temp_df_pe_data, sampleinfo_df, outplot, "No", "Sample Groups", "Mean Protein Abundance (log)"); |
| 904 | 1099 lines <- extractWidgetCode(outplot) |
| 1100 #prescripts <- c(prescripts, lines$prescripts) | |
| 1101 postscripts <- c(postscripts, lines$postscripts) | |
| 1102 cat("<td align=center>", '<img src="Box_PE_rep.png" width=500 height=500>', lines$widget_div, '</td></tr></table>\n', file = htmloutfile, append = TRUE); | |
| 1103 | |
| 905 #=============================================================================== | 1104 #=============================================================================== |
| 906 # Calculating log fold change and running the "single" code part | 1105 # Calculating log fold change and running the "single" code part |
| 907 #=============================================================================== | 1106 #=============================================================================== |
| 908 | 1107 |
| 909 TE_df = data.frame("Genes"=TE_df[,1], "LogFold"=apply(TE_df[,c(which(colnames(TE_df)==condition_g_name),which(colnames(TE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2))); | 1108 TE_df = data.frame("Genes"=TE_df[,1], "LogFold"=apply(TE_df[,c(which(colnames(TE_df)==condition_g_name),which(colnames(TE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2))); |
| 910 PE_df = data.frame("Genes"=PE_df[,1], "LogFold"=apply(PE_df[,c(which(colnames(PE_df)==condition_g_name),which(colnames(PE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2))); | 1109 PE_df = data.frame("Genes"=PE_df[,1], "LogFold"=apply(PE_df[,c(which(colnames(PE_df)==condition_g_name),which(colnames(PE_df)==control_g_name))],1,function(x) log(x[1]/x[2],base=2))); |
| 911 | 1110 |
| 912 #=============================================================================== | 1111 #=============================================================================== |
| 913 # Treat missing values | 1112 # Treat missing values |
| 914 #=============================================================================== | 1113 #=============================================================================== |
| 915 | 1114 |
| 916 TE_df[is.infinite(TE_df[,2]),2] = NA; | 1115 TE_df[is.infinite(TE_df[,2]),2] = NA; |
| 917 PE_df[is.infinite(PE_df[,2]),2] = NA; | 1116 PE_df[is.infinite(PE_df[,2]),2] = NA; |
| 918 TE_df[is.na(TE_df)] = 0; | 1117 TE_df[is.na(TE_df)] = 0; |
| 919 PE_df[is.na(PE_df)] = 0; | 1118 PE_df[is.na(PE_df)] = 0; |
| 920 | 1119 |
| 921 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change")) | 1120 sampleinfo_df = data.frame("Sample"= c("LogFold"), "Group"=c("Fold_Change")) |
| 922 #=============================================================================== | 1121 #=============================================================================== |
| 923 # Find common samples | 1122 # Find common samples |
| 924 #=============================================================================== | 1123 #=============================================================================== |
| 925 | 1124 |
| 926 common_samples = intersect(sampleinfo_df[,1], colnames(TE_df)[-1]) %>% intersect(., colnames(PE_df)[-1]); | 1125 common_samples = intersect(sampleinfo_df[,1], colnames(TE_df)[-1]) %>% intersect(., colnames(PE_df)[-1]); |
| 927 TE_df = select(TE_df, 1, common_samples); | 1126 TE_df = select(TE_df, 1, common_samples); |
| 928 PE_df = select(PE_df, 1, common_samples); | 1127 PE_df = select(PE_df, 1, common_samples); |
| 929 sampleinfo_df = filter(sampleinfo_df, Sample %in% common_samples); | 1128 sampleinfo_df = filter(sampleinfo_df, Sample %in% common_samples); |
| 930 rownames(sampleinfo_df) = sampleinfo_df[,1]; | 1129 rownames(sampleinfo_df) = sampleinfo_df[,1]; |
| 931 | 1130 |
| 932 # TE Boxplot | 1131 # TE Boxplot |
| 933 outplot = paste(outdir,"/Box_TE.png",sep="",collape=""); | 1132 outplot = paste(outdir,"/Box_TE.png",sep="",collape=""); |
| 1133 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Transcript Abundance fold-change (log2)"); | |
| 1134 lines <- extractWidgetCode(outplot) | |
| 1135 postscripts <- c(postscripts, lines$postscripts) | |
| 934 cat('<br><font color="#ff0000"><h3>Distribution (Box plot) of log fold change </h3></font>', file = htmloutfile, append = TRUE); | 1136 cat('<br><font color="#ff0000"><h3>Distribution (Box plot) of log fold change </h3></font>', file = htmloutfile, append = TRUE); |
| 935 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', | 1137 cat('<table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Boxplot: Transcriptome data</font></th><th><font color=#ffcc33>Boxplot: Proteome data</font></th></tr>\n', |
| 936 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500></td>\n', file = htmloutfile, append = TRUE); | 1138 "<tr><td align=center>", '<img src="Box_TE.png" width=500 height=500>', lines$widget_div, '</td>\n', file = htmloutfile, append = TRUE); |
| 937 multisample_boxplot(TE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Transcript Abundance fold-change (log2)"); | |
| 938 | 1139 |
| 939 # PE Boxplot | 1140 # PE Boxplot |
| 940 outplot = paste(outdir,"/Box_PE.png",sep="",collape=""); | 1141 outplot = paste(outdir,"/Box_PE.png",sep="",collape=""); |
| 941 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500></td></tr></table>\n', file = htmloutfile, append = TRUE); | |
| 942 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Protein Abundance fold-change(log2)"); | 1142 multisample_boxplot(PE_df, sampleinfo_df, outplot, "Yes", "Sample (log2(case/control))", "Protein Abundance fold-change(log2)"); |
| 1143 lines <- extractWidgetCode(outplot) | |
| 1144 postscripts <- c(postscripts, lines$postscripts) | |
| 1145 cat("<td align=center>", '<img src="Box_PE.png" width=500 height=500>', lines$widget_div,'</td></tr></table>\n', file = htmloutfile, append = TRUE); | |
| 943 | 1146 |
| 944 | 1147 |
| 945 # Log Fold Data | 1148 # Log Fold Data |
| 946 perform_Test_Volcano(TE_df_orig,PE_df_orig,TE_df, PE_df,sampleinfo_df_orig,method,correction_method,volc_with) | 1149 perform_Test_Volcano(TE_df_orig,PE_df_orig,TE_df, PE_df,sampleinfo_df_orig,method,correction_method,volc_with) |
| 947 | 1150 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/TE_volcano.png",sep="",collapse=""))$postscripts) |
| 948 | 1151 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_volcano.png",sep="",collapse=""))$postscripts) |
| 949 | 1152 |
| 950 # Print PCA | 1153 # Print PCA |
| 951 | 1154 |
| 952 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>PCA plot: Transcriptome data</font></th><th><font color=#ffcc33>PCA plot: Proteome data</font></th></tr>\n', | 1155 cat('<br><br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>PCA plot: Transcriptome data</font></th><th><font color=#ffcc33>PCA plot: Proteome data</font></th></tr>\n', |
| 953 "<tr><td align=center>", '<img src="PCA_TE_all_rep.png" width=500 height=500></td>\n', | 1156 "<tr><td align=center>", '<img src="PCA_TE_all_rep.png" width=500 height=500>', PCA_TE$widget_div, '</td>\n', |
| 954 "<td align=center>", '<img src="PCA_PE_all_rep.png" width=500 height=500></td></tr></table>\n', | 1157 "<td align=center>", '<img src="PCA_PE_all_rep.png" width=500 height=500>', PCA_PE$widget_div, '</td></tr></table>\n', |
| 955 file = htmloutfile, append = TRUE); | 1158 file = htmloutfile, append = TRUE); |
| 956 | 1159 |
| 957 | 1160 |
| 958 | 1161 |
| 959 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n', | 1162 cat('<hr/><h2 id="corr_data"><font color=#ff0000>CORRELATION</font></h2>\n', |
| 960 file = htmloutfile, append = TRUE); | 1163 file = htmloutfile, append = TRUE); |
| 1164 | |
| 1165 PE_TE_data = data.frame(PE_df, TE_df); | |
| 1166 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance"); | |
| 961 | 1167 |
| 962 # TE PE scatter | 1168 # TE PE scatter |
| 963 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape=""); | 1169 outplot = paste(outdir,"/TE_PE_scatter.png",sep="",collape=""); |
| 964 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE); | 1170 cat('<br><table border=1 cellspacing=0 cellpadding=5 style="table-layout:auto; "> <tr bgcolor="#7a0019"><th><font color=#ffcc33>Scatter plot between Proteome and Transcriptome Abundance</font></th></tr>\n', file = htmloutfile, append = TRUE); |
| 965 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800></td>\n', file = htmloutfile, append = TRUE); | |
| 966 PE_TE_data = data.frame(PE_df, TE_df); | |
| 967 colnames(PE_TE_data) = c("PE_ID","PE_abundance","TE_ID","TE_abundance"); | |
| 968 singlesample_scatter(PE_TE_data, outplot); | 1171 singlesample_scatter(PE_TE_data, outplot); |
| 969 | 1172 lines <- extractWidgetCode(outplot); |
| 1173 postscripts <- c(postscripts, lines$postscripts); | |
| 1174 cat("<tr><td align=center>", '<img src="TE_PE_scatter.png" width=800 height=800>', gsub('width:500px;height:500px', 'width:800px;height:800px' , lines$widget_div), | |
| 1175 '</td>\n', file = htmloutfile, append = TRUE); | |
| 1176 | |
| 970 # TE PE Cor | 1177 # TE PE Cor |
| 971 cat("<tr><td align=center>\n", file = htmloutfile, append = TRUE); | 1178 cat("<tr><td align=center>\n", file = htmloutfile, append = TRUE); |
| 972 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); | 1179 singlesample_cor(PE_TE_data, htmloutfile, append=TRUE); |
| 973 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n', | 1180 cat('<font color="red">*Note that <u>correlation</u> is <u>sensitive to outliers</u> in the data. So it is important to analyze outliers/influential observations in the data.<br> Below we use <u>Cook\'s distance based approach</u> to identify such influential observations.</font>\n', |
| 974 file = htmloutfile, append = TRUE); | 1181 file = htmloutfile, append = TRUE); |
| 975 cat('</td></table>', | 1182 cat('</td></table>', |
| 976 file = htmloutfile, append = TRUE); | 1183 file = htmloutfile, append = TRUE); |
| 977 | 1184 |
| 978 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n', | 1185 cat('<hr/><h2 id="regression_data"><font color=#ff0000>REGRESSION ANALYSIS</font></h2>\n', |
| 979 file = htmloutfile, append = TRUE); | 1186 file = htmloutfile, append = TRUE); |
| 980 | 1187 |
| 981 # TE PE Regression | 1188 # TE PE Regression |
| 982 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE); | 1189 singlesample_regression(PE_TE_data,htmloutfile, append=TRUE); |
| 1190 postscripts <- c(postscripts, c(extractWidgetCode(paste(outdir,"/PE_TE_lm_1.png",sep="",collapse=""))$postscripts, | |
| 1191 extractWidgetCode(paste(outdir,"/PE_TE_lm_2.png",sep="",collapse=""))$postscripts, | |
| 1192 extractWidgetCode(paste(outdir,"/PE_TE_lm_5.png",sep="",collapse=""))$postscripts, | |
| 1193 extractWidgetCode(paste(outdir,"/PE_TE_lm_cooksd.png",sep="",collapse=""))$postscripts, | |
| 1194 extractWidgetCode(paste(outdir,"/AbundancePlot_scatter_without_outliers.png",sep="",collapse=""))$postscripts, | |
| 1195 gsub('data-for="html', 'data-for="secondhtml"', | |
| 1196 extractWidgetCode(paste(outdir,"/TE_PE_scatter.png",sep="",collapse=""))$postscripts))); | |
| 983 | 1197 |
| 984 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n', | 1198 cat('<hr/><h2 id="cluster_data"><font color=#ff0000>CLUSTER ANALYSIS</font></h2>\n', |
| 985 file = htmloutfile, append = TRUE); | 1199 file = htmloutfile, append = TRUE); |
| 986 | 1200 |
| 987 #TE PE Heatmap | 1201 #TE PE Heatmap |
| 988 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust); | 1202 singlesample_heatmap(PE_TE_data, htmloutfile, hm_nclust); |
| 1203 lines <- extractWidgetCode(paste(outdir,"/PE_TE_heatmap.png",sep="",collapse="")) | |
| 1204 postscripts <- c(postscripts, lines$postscripts) | |
| 1205 prescripts <- c(prescripts, lines$prescripts) | |
| 989 | 1206 |
| 990 #TE PE Clustering (kmeans) | 1207 #TE PE Clustering (kmeans) |
| 991 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster)) | 1208 singlesample_kmeans(PE_TE_data, htmloutfile, nclust=as.numeric(numCluster)) |
| 992 | 1209 postscripts <- c(postscripts, extractWidgetCode(paste(outdir,"/PE_TE_kmeans.png",sep="",collapse=""))$postscripts); |
| 993 } | 1210 } |
| 994 } | 1211 } |
| 995 cat("<h3>Go To:</h3>\n", | 1212 cat("<h3>Go To:</h3>\n", |
| 996 "<ul>\n", | 1213 "<ul>\n", |
| 997 "<li><a href=#sample_dist>Sample distribution</a></li>\n", | 1214 "<li><a href=#sample_dist>Sample distribution</a></li>\n", |
| 1000 "<li><a href=#inf_obs>Influential observations</a></li>\n", | 1217 "<li><a href=#inf_obs>Influential observations</a></li>\n", |
| 1001 "<li><a href=#cluster_data>Cluster analysis</a></li></ul>\n", | 1218 "<li><a href=#cluster_data>Cluster analysis</a></li></ul>\n", |
| 1002 "<br><a href=#>TOP</a>", | 1219 "<br><a href=#>TOP</a>", |
| 1003 file = htmloutfile, append = TRUE); | 1220 file = htmloutfile, append = TRUE); |
| 1004 cat("</body></html>\n", file = htmloutfile, append = TRUE); | 1221 cat("</body></html>\n", file = htmloutfile, append = TRUE); |
| 1222 | |
| 1223 | |
| 1224 #=============================================================================== | |
| 1225 # Add masked-javascripts tags to HTML file in the head and end | |
| 1226 #=============================================================================== | |
| 1227 | |
| 1228 htmllines <- readLines(htmloutfile) | |
| 1229 htmllines[1] <- paste('<html>\n<head>\n', paste(prescripts, collapse='\n'), '\n</head>\n<body>') | |
| 1230 cat(paste(htmllines, collapse='\n'), file = htmloutfile) | |
| 1231 cat('\n', paste(postscripts, collapse='\n'), "\n", | |
| 1232 "</body>\n</html>\n", file = htmloutfile, append = TRUE); |
