music_deconvolution: scripts/estimateprops.R comparison

comparison scripts/estimateprops.R @ 3:064056f47ff2 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 20f8561478535013e111d982b99639f48f1bea79"

author	bgruening
date	Sat, 29 Jan 2022 12:49:59 +0000
parents	f7ade72ba78c
children	46fb28053b12

comparison

equal deleted inserted replaced

-:1eb4f2f83579
+:064056f47ff2
 samples = samples_label, select.ct = celltypes, verbose = T)
 estimated_music_props <- est_prop$Est.prop.weighted
 estimated_nnls_props <- est_prop$Est.prop.allgene
+scale_yaxes <- function(gplot, value) {
+if (is.na(value)) {
+gplot
+} else {
+gplot + scale_y_continuous(lim = c(0, value))
+}
+}
 ## Show different in estimation methods
 ## Jitter plot of estimated cell type proportions
-jitter_fig <- Jitter_Est(
+jitter_fig <- scale_yaxes(Jitter_Est(
 list(data.matrix(estimated_music_props),
 data.matrix(estimated_nnls_props)),
 method.name = methods, title = "Jitter plot of Est Proportions",
-size = 2, alpha = 0.7) + theme_minimal()
+size = 2, alpha = 0.7) + theme_minimal(), maxyscale)
 ## Make a Plot
 ## A more sophisticated jitter plot is provided as below. We separated
 ## the T2D subjects and normal subjects by their disease factor levels.
 celltypes <- levels(m_prop$CellType)
 message("No celltypes declared, using:")
 message(celltypes)
 }
-if (phenotype_target_threshold == -99) {
-phenotype_target_threshold <- -Inf
-message("phenotype target threshold set to -Inf")
-}
 if (is.null(phenotype_factors)) {
 phenotype_factors <- colnames(pData(bulk_eset))
 }
 ## filter out unwanted factors like "sampleID" and "subjectName"
 phenotype_factors <- phenotype_factors[
 !(phenotype_factors %in% phenotype_factors_always_exclude)]
 message("Phenotype Factors to use:")
-message(phenotype_factors)
+message(paste0(phenotype_factors, collapse = ", "))
 m_prop$CellType <- factor(m_prop$CellType, levels = celltypes) # nolint
 m_prop$Method <- factor(rep(methods, each = nrow(estimated_music_props_flat)), # nolint
 levels = methods)
-m_prop$Disease_factor <- rep(bulk_eset[[phenotype_target]], 2 * length(celltypes)) # nolint
-m_prop <- m_prop[!is.na(m_prop$Disease_factor), ]
+if (use_disease_factor) {
-## Generate a TRUE/FALSE table of Normal == 1 and Disease == 2
-sample_groups <- c("Normal", sample_disease_group)
+if (phenotype_target_threshold == -99) {
-m_prop$Disease <- factor(sample_groups[(m_prop$Disease_factor > phenotype_target_threshold) + 1], # nolint
+phenotype_target_threshold <- -Inf
-levels = sample_groups)
+message("phenotype target threshold set to -Inf")
+}
-## Binary to scale: e.g. TRUE / 5 = 0.2
-m_prop$D <- (m_prop$Disease ==   # nolint
+m_prop$Disease_factor <- rep(bulk_eset[[phenotype_target]], 2 * length(celltypes)) # nolint
-sample_disease_group) / sample_disease_group_scale
+m_prop <- m_prop[!is.na(m_prop$Disease_factor), ]
-## NA's are not included in the comparison below
+## Generate a TRUE/FALSE table of Normal == 1 and Disease == 2
-m_prop <- rbind(subset(m_prop, Disease != sample_disease_group),
+sample_groups <- c("Normal", sample_disease_group)
-subset(m_prop, Disease == sample_disease_group))
+m_prop$Disease <- factor(sample_groups[(m_prop$Disease_factor > phenotype_target_threshold) + 1], # nolint
+levels = sample_groups)
-jitter_new <- ggplot(m_prop, aes(Method, Prop)) +
-geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),
+## Binary to scale: e.g. TRUE / 5 = 0.2
-size = 2, alpha = 0.7,
+m_prop$D <- (m_prop$Disease ==   # nolint
-position = position_jitter(width = 0.25, height = 0)) +
-facet_wrap(~ CellType, scales = "free") +
-scale_colour_manual(values = c("white", "gray20")) +
-scale_shape_manual(values = c(21, 24)) + theme_minimal()
-## Plot to compare method effectiveness
-## Create dataframe for beta cell proportions and Disease_factor levels
-m_prop_ana <- data.frame(pData(bulk_eset)[rep(1:nrow(estimated_music_props), 2), #nolint
-phenotype_factors],
-ct.prop = c(estimated_music_props[, 2],
-estimated_nnls_props[, 2]),
-Method = factor(rep(methods,
-each = nrow(estimated_music_props)),
-levels = methods))
-colnames(m_prop_ana)[1:length(phenotype_factors)] <- phenotype_factors #nolint
-m_prop_ana <- subset(m_prop_ana, !is.na(m_prop_ana[phenotype_target]))
-m_prop_ana$Disease <- factor(sample_groups[(  # nolint
-m_prop_ana[phenotype_target] > phenotype_target_threshold) + 1],
-sample_groups)
-m_prop_ana$D <- (m_prop_ana$Disease ==        # nolint
 sample_disease_group) / sample_disease_group_scale
+## NA's are not included in the comparison below
-jitt_compare <- ggplot(m_prop_ana, aes_string(phenotype_target, "ct.prop")) +
+m_prop <- rbind(subset(m_prop, Disease != sample_disease_group),
-geom_smooth(method = "lm",  se = FALSE, col = "black", lwd = 0.25) +
+subset(m_prop, Disease == sample_disease_group))
-geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),
-size = 2, alpha = 0.7) +  facet_wrap(~ Method) +
+jitter_new <- scale_yaxes(ggplot(m_prop, aes(Method, Prop)) +
-ggtitle(compare_title) + theme_minimal() +
+geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),
-scale_colour_manual(values = c("white", "gray20")) +
+size = 2, alpha = 0.7,
-scale_shape_manual(values = c(21, 24))
+position = position_jitter(width = 0.25, height = 0)) +
+facet_wrap(~ CellType, scales = "free") +
+scale_colour_manual(values = c("white", "gray20")) +
+scale_shape_manual(values = c(21, 24)) + theme_minimal(), maxyscale)
+}
+if (use_disease_factor) {
+## Plot to compare method effectiveness
+## Create dataframe for beta cell proportions and Disease_factor levels
+## - Ugly code. Essentially, doubles the cell type proportions for each
+##   set of MuSiC and NNLS methods
+m_prop_ana <- data.frame(pData(bulk_eset)[rep(1:nrow(estimated_music_props), 2), #nolint
+phenotype_factors],
+## get proportions of target cell type
+ct.prop = c(estimated_music_props[, phenotype_scrna_target],
+estimated_nnls_props[, phenotype_scrna_target]),
+##
+Method = factor(rep(methods,
+each = nrow(estimated_music_props)),
+levels = methods))
+## - fix headers
+colnames(m_prop_ana)[1:length(phenotype_factors)] <- phenotype_factors #nolint
+## - drop NA for target phenotype (e.g. hba1c)
+m_prop_ana <- subset(m_prop_ana, !is.na(m_prop_ana[phenotype_target]))
+m_prop_ana$Disease <- factor(   # nolint
+## - Here we set Normal/Disease assignments across the two MuSiC and NNLS methods
+sample_groups[(
+m_prop_ana[phenotype_target] > phenotype_target_threshold) + 1
+],
+sample_groups)
+## - Then we scale this binary assignment to a plotable factor
+m_prop_ana$D <- (m_prop_ana$Disease ==        # nolint
+sample_disease_group) / sample_disease_group_scale
+jitt_compare <- scale_yaxes(ggplot(m_prop_ana, aes_string(phenotype_target, "ct.prop")) +
+geom_smooth(method = "lm",  se = FALSE, col = "black", lwd = 0.25) +
+geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),
+size = 2, alpha = 0.7) +  facet_wrap(~ Method) +
+ggtitle(paste0(toupper(phenotype_target), " vs. ",
+toupper(phenotype_scrna_target), " Cell Type Proportion")) +
+theme_minimal() +
+ylab(paste0("Proportion of ",
+phenotype_scrna_target, " cells")) +
+xlab(paste0("Level of bulk factor (", phenotype_target, ")")) +
+scale_colour_manual(values = c("white", "gray20")) +
+scale_shape_manual(values = c(21, 24)), maxyscale)
+}
 ## BoxPlot
-plot_box <- Boxplot_Est(list(
+plot_box <- scale_yaxes(Boxplot_Est(list(
 data.matrix(estimated_music_props),
 data.matrix(estimated_nnls_props)),
 method.name = c("MuSiC", "NNLS")) +
 theme(axis.text.x = element_text(angle = -90),
 axis.text.y = element_text(size = 8)) +
-ggtitle(element_blank()) + theme_minimal()
+ggtitle(element_blank()) + theme_minimal(), maxyscale)
 ## Heatmap
 plot_hmap <- Prop_heat_Est(list(
 data.matrix(estimated_music_props),
 data.matrix(estimated_nnls_props)),
 method.name = c("MuSiC", "NNLS")) +
 theme(axis.text.x = element_text(angle = -90),
 axis.text.y = element_text(size = 6))
 pdf(file = outfile_pdf, width = 8, height = 8)
-plot_grid(jitter_fig, plot_box, labels = "auto", ncol = 1, nrow = 2)
+if (length(celltypes) <= 8) {
-plot_grid(jitter_new, jitt_compare, labels = "auto", ncol = 1, nrow = 2)
+plot_grid(jitter_fig, plot_box, labels = "auto", ncol = 1, nrow = 2)
+} else {
+print(jitter_fig)
+plot_box
+}
+if (use_disease_factor) {
+plot_grid(jitter_new, jitt_compare, labels = "auto", ncol = 1, nrow = 2)
+}
 plot_hmap
 message(dev.off())
 ## Output Proportions
 "Matrix of Variance of MuSiC Estimates",
 ".tabular"),
 quote = F, sep = "\t", col.names = NA)
-## Summary table
+if (use_disease_factor) {
-for (meth in methods) {
+## Summary table of linear regressions of disease factors
-##lm_beta_meth = lm(ct.prop ~ age + bmi + hba1c + gender, data =
+for (meth in methods) {
-sub_data <- subset(m_prop_ana, Method == meth)
+##lm_beta_meth = lm(ct.prop ~ age + bmi + hba1c + gender, data =
-## We can only do regression where there are more than 1 factors
+sub_data <- subset(m_prop_ana, Method == meth)
-## so we must find and exclude the ones which are not
-gt1_facts <- sapply(phenotype_factors, function(facname) {
+## We can only do regression where there are more than 1 factors
-return(length(unique(sort(sub_data[[facname]]))) == 1)
+## so we must find and exclude the ones which are not
-})
+gt1_facts <- sapply(phenotype_factors, function(facname) {
-form_factors <- phenotype_factors
+return(length(unique(sort(sub_data[[facname]]))) == 1)
-exclude_facts <- names(gt1_facts)[gt1_facts]
+})
-if (length(exclude_facts) > 0) {
+form_factors <- phenotype_factors
-message("Factors with only one level will be excluded:")
+exclude_facts <- names(gt1_facts)[gt1_facts]
-message(exclude_facts)
+if (length(exclude_facts) > 0) {
-form_factors <- phenotype_factors[
+message("Factors with only one level will be excluded:")
-!(phenotype_factors %in% exclude_facts)]
+message(exclude_facts)
+form_factors <- phenotype_factors[
+!(phenotype_factors %in% exclude_facts)]
+}
+lm_beta_meth <- lm(as.formula(
+paste("ct.prop", paste(form_factors, collapse = " + "),
+sep = " ~ ")), data = sub_data)
+message(paste0("Summary: ", meth))
+capture.output(summary(lm_beta_meth),
+file = paste0("report_data/summ_Log of ",
+meth,
+" fitting.txt"))
 }
-lm_beta_meth <- lm(as.formula(
+}
-paste("ct.prop", paste(form_factors, collapse = " + "),
-sep = " ~ ")), data = sub_data)
-message(paste0("Summary: ", meth))
-capture.output(summary(lm_beta_meth),
-file = paste0("report_data/summ_Log of ",
-meth,
-" fitting.txt"))
-}

Mercurial > repos > bgruening > music_deconvolution

comparison scripts/estimateprops.R @ 3:064056f47ff2 draft