Mercurial > repos > eschen42 > mqppep_preproc
comparison mqppep_anova_script.Rmd @ 31:e103de3e41e6 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 7e8b616a2519c8faeb6fd743a8fb73f271f655f9
| author | eschen42 |
|---|---|
| date | Fri, 09 Dec 2022 21:06:08 +0000 |
| parents | 87794cf65bc0 |
| children | 072fe8228dfa |
comparison
equal
deleted
inserted
replaced
| 30:87794cf65bc0 | 31:e103de3e41e6 |
|---|---|
| 4 - "Nick Graham^[ORCiD 0000-0002-6811-1941, University of Southern California: Los Angeles, CA, US]" | 4 - "Nick Graham^[ORCiD 0000-0002-6811-1941, University of Southern California: Los Angeles, CA, US]" |
| 5 - "Larry Cheng^[ORCiD 0000-0002-6922-6433, Rutgers School of Graduate Studies: New Brunswick, NJ, US]" | 5 - "Larry Cheng^[ORCiD 0000-0002-6922-6433, Rutgers School of Graduate Studies: New Brunswick, NJ, US]" |
| 6 - "Art Eschenlauer^[ORCiD 0000-0002-2882-0508, University of Minnesota: Minneapolis, Minnesota, US]" | 6 - "Art Eschenlauer^[ORCiD 0000-0002-2882-0508, University of Minnesota: Minneapolis, Minnesota, US]" |
| 7 date: | 7 date: |
| 8 - "May 28, 2018" | 8 - "May 28, 2018" |
| 9 - "; revised June 23, 2022" | 9 - "; revised December 7, 2022" |
| 10 lot: true | 10 lot: true |
| 11 output: | 11 output: |
| 12 pdf_document: | 12 pdf_document: |
| 13 toc: true | 13 toc: true |
| 14 toc_depth: 2 | 14 toc_depth: 2 |
| 95 # look-up tables for kinase descriptions | 95 # look-up tables for kinase descriptions |
| 96 kinaseNameUprtLutBz2: "./kinase_name_uniprot_lut.tabular.bz2" | 96 kinaseNameUprtLutBz2: "./kinase_name_uniprot_lut.tabular.bz2" |
| 97 kinaseUprtDescLutBz2: "./kinase_uniprot_description_lut.tabular.bz2" | 97 kinaseUprtDescLutBz2: "./kinase_uniprot_description_lut.tabular.bz2" |
| 98 # should debugging trace messages be printed? | 98 # should debugging trace messages be printed? |
| 99 showEnrichedSubstrates: FALSE | 99 showEnrichedSubstrates: FALSE |
| 100 | |
| 101 # should debugging nb/nbe messages be printed? | 100 # should debugging nb/nbe messages be printed? |
| 102 printNBMsgs: FALSE | 101 printNBMsgs: FALSE |
| 102 # showld row-scaling be applied to heatmaps: "none" or "row" | |
| 103 defaultHeatMapRowScaling: "none" | |
| 103 # should debugging trace messages be printed? | 104 # should debugging trace messages be printed? |
| 104 printTraceMsgs: FALSE | 105 printTraceMsgs: FALSE |
| 105 # when debugging files are needed, set debugFileBasePath to the path | 106 # when debugging files are needed, set debugFileBasePath to the path |
| 106 # to the directory where they should be writtn | 107 # to the directory where they should be written |
| 107 debugFileBasePath: !r if (TRUE) NULL else "test-data" | 108 debugFileBasePath: !r if (TRUE) NULL else "test-data" |
| 108 --- | 109 --- |
| 109 | 110 |
| 110 ```{r setup, include = FALSE, results = 'asis'} | 111 ```{r setup, include = FALSE, results = 'asis'} |
| 111 | 112 |
| 553 ) | 554 ) |
| 554 ) | 555 ) |
| 555 knitr::knit_exit() | 556 knitr::knit_exit() |
| 556 } | 557 } |
| 557 ) | 558 ) |
| 559 | |
| 560 g_default_heatmap_row_scaling <- | |
| 561 params$defaultHeatMapRowScaling | |
| 562 if ( | |
| 563 !is.character(g_default_heatmap_row_scaling) || | |
| 564 !(g_default_heatmap_row_scaling %in% c("row", "none")) | |
| 565 ) { | |
| 566 cat("invalid defaultHeatMapRowScaling (must be 'row' or 'none')") | |
| 567 knitr::knit_exit() | |
| 568 } | |
| 558 | 569 |
| 559 # intensityHeatmapRows: 50 | 570 # intensityHeatmapRows: 50 |
| 560 # TODO Validate >> 0 < 75 | 571 # TODO Validate >> 0 < 75 |
| 561 g_intensity_hm_rows <- params$intensityHeatmapRows | 572 g_intensity_hm_rows <- params$intensityHeatmapRows |
| 562 if (!is.integer(g_intensity_hm_rows) || g_intensity_hm_rows < 1) { | 573 if (!is.integer(g_intensity_hm_rows) || g_intensity_hm_rows < 1) { |
| 2285 max_nchar_rowname <- max(nchar(rownames(x))) | 2296 max_nchar_rowname <- max(nchar(rownames(x))) |
| 2286 max_nchar_colname <- max(nchar(colnames(x))) | 2297 max_nchar_colname <- max(nchar(colnames(x))) |
| 2287 my_limit <- g_intensity_hm_rows | 2298 my_limit <- g_intensity_hm_rows |
| 2288 | 2299 |
| 2289 my_row_cex_scale <- master_cex * 150 / nrow_x | 2300 my_row_cex_scale <- master_cex * 150 / nrow_x |
| 2301 #ACE row cex shrink hack begin | |
| 2302 my_row_cex_scale <- master_cex * 150 / max(nrow_x, ncol_x) | |
| 2303 #ACE row cex shrink hack end | |
| 2304 | |
| 2290 my_col_cex_scale <- 3.0 | 2305 my_col_cex_scale <- 3.0 |
| 2306 #ACE col cex shrink hack begin | |
| 2307 if (ncol_x > 40) | |
| 2308 my_col_cex_scale <- 3.0 * 40 / ncol_x | |
| 2309 #ACE col cex shrink hack end | |
| 2310 | |
| 2291 my_asterisk_scale <- 0.4 * my_row_cex_scale | 2311 my_asterisk_scale <- 0.4 * my_row_cex_scale |
| 2292 my_row_warp <- 1 | 2312 my_row_warp <- 1 |
| 2293 my_note_warp <- 2 | 2313 my_note_warp <- 2 |
| 2294 my_row_warp <- 1 | 2314 my_row_warp <- 1 |
| 2295 my_row_cex_asterisk <- | 2315 my_row_cex_asterisk <- |
| 2604 g_intensity_hm_rows, # values of 50 and 75 worked well | 2624 g_intensity_hm_rows, # values of 50 and 75 worked well |
| 2605 master_cex = 1.0, # basis for text sizes | 2625 master_cex = 1.0, # basis for text sizes |
| 2606 margins = NULL, # optional margins (bottom, right) | 2626 margins = NULL, # optional margins (bottom, right) |
| 2607 cellnote = NULL, # optional matrix of character; dim = dim(m) | 2627 cellnote = NULL, # optional matrix of character; dim = dim(m) |
| 2608 adj = 0.5, # adjust text: 0 left, 0.5 middle, 1 right | 2628 adj = 0.5, # adjust text: 0 left, 0.5 middle, 1 right |
| 2629 row_scaling = # should row-scaling be applied if possible? | |
| 2630 g_default_heatmap_row_scaling, | |
| 2609 ... # passthru to hm2plus or heatmap.2 | 2631 ... # passthru to hm2plus or heatmap.2 |
| 2610 ) { | 2632 ) { |
| 2611 use_heatmap_1 <- FALSE | 2633 use_heatmap_1 <- FALSE |
| 2612 peptide_count <- 0 | 2634 peptide_count <- 0 |
| 2613 # emit the heading for the heatmap | 2635 # emit the heading for the heatmap |
| 2778 } | 2800 } |
| 2779 par(op) | 2801 par(op) |
| 2780 } | 2802 } |
| 2781 | 2803 |
| 2782 # invoke hm_call inner function | 2804 # invoke hm_call inner function |
| 2783 if (sum(rowSums(!is.na(m_hm)) < 2)) | 2805 if (row_scaling != "row" || sum(rowSums(!is.na(m_hm)) < 2)) |
| 2784 hm_call( | 2806 hm_call( |
| 2785 m_hm, | 2807 m_hm, |
| 2786 "none", | 2808 "none", |
| 2787 "log(intensities), unscaled, unimputed, and unnormalized" | 2809 "log(intensities), unimputed, and unnormalized" |
| 2788 ) | 2810 ) |
| 2789 else | 2811 else |
| 2790 hm_call( | 2812 hm_call( |
| 2791 m_hm, | 2813 m_hm, |
| 2792 "row", | 2814 "row", |
| 2800 { | 2822 { |
| 2801 if (nrow(m_hm) > 1) | 2823 if (nrow(m_hm) > 1) |
| 2802 hm_call( | 2824 hm_call( |
| 2803 m_hm, | 2825 m_hm, |
| 2804 "none", | 2826 "none", |
| 2805 paste( | 2827 "log(intensities), zero-imputed, unnormalized" |
| 2806 "log(intensities), unscaled,", | |
| 2807 "zero-imputed, unnormalized" | |
| 2808 ) | |
| 2809 ) | 2828 ) |
| 2810 else | 2829 else |
| 2811 cat("\nThere are too few peptides to produce a heatmap.\n") | 2830 cat("\nThere are too few peptides to produce a heatmap.\n") |
| 2812 }, | 2831 }, |
| 2813 error = function(r) { | 2832 error = function(r) { |
| 3713 | 3732 |
| 3714 imp_smry_pot_peptides_after <- sum(good_rows) | 3733 imp_smry_pot_peptides_after <- sum(good_rows) |
| 3715 imp_smry_rejected_after <- sum(!good_rows) | 3734 imp_smry_rejected_after <- sum(!good_rows) |
| 3716 imp_smry_missing_values_after <- sum(is.na(quant_data_imp[good_rows, ])) | 3735 imp_smry_missing_values_after <- sum(is.na(quant_data_imp[good_rows, ])) |
| 3717 | 3736 |
| 3718 # From ?`%in%`, %in% is currently defined as function(x, table) match(x, table, nomatch = 0) > 0 | 3737 # From ?`%in%`: |
| 3719 | 3738 # %in% is currently defined as function(x, table) match(x, table, nomatch = 0) > 0 |
| 3720 sink(stderr()) | |
| 3721 print("`%in%`:") | |
| 3722 print(`%in%`) | |
| 3723 sink() | |
| 3724 | 3739 |
| 3725 stock_in <- | 3740 stock_in <- |
| 3726 names(good_rows) %in% | 3741 names(good_rows) %in% |
| 3727 names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count]) | 3742 names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count]) |
| 3728 if (print_nb_messages) nbe(see_variable(stock_in), "\n") | 3743 if (print_nb_messages) nbe(see_variable(stock_in), "\n") |
| 5092 grouping_factor = | 5107 grouping_factor = |
| 5093 as.factor(grouping_factor$level), # anova_func arg2 | 5108 as.factor(grouping_factor$level), # anova_func arg2 |
| 5094 one_way_f = one_way_two_categories, # anova_func arg3 | 5109 one_way_f = one_way_two_categories, # anova_func arg3 |
| 5095 simplify = TRUE # TRUE is the default for simplify | 5110 simplify = TRUE # TRUE is the default for simplify |
| 5096 ) | 5111 ) |
| 5097 contrast_data_adj_p_values <- p.adjust( | 5112 |
| 5098 p = p_value_data_contrast_ps, | 5113 if (!is.null(p_value_data_contrast_ps)) { |
| 5099 method = "fdr", | 5114 contrast_data_adj_p_values <- |
| 5100 n = length(p_value_data_contrast_ps) # this is the default, length(p) | 5115 p.adjust( |
| 5101 ) | 5116 p = p_value_data_contrast_ps, |
| 5102 # - compute the fold-change | 5117 method = "fdr", |
| 5103 contrast_p_df <- | 5118 n = length(p_value_data_contrast_ps) # this is the default, length(p) |
| 5104 data.frame( | 5119 ) |
| 5105 contrast = contrast, | 5120 |
| 5106 phosphopep = contrast_cast$phosphopep, | 5121 # - compute the fold-change |
| 5107 p_value_raw = p_value_data_contrast_ps, | 5122 contrast_p_df <- |
| 5108 p_value_adj = contrast_data_adj_p_values | 5123 data.frame( |
| 5109 ) | 5124 contrast = contrast, |
| 5110 db_write_table_overwrite <- (contrast < 2) | 5125 phosphopep = contrast_cast$phosphopep, |
| 5111 db_write_table_append <- !db_write_table_overwrite | 5126 p_value_raw = p_value_data_contrast_ps, |
| 5112 RSQLite::dbWriteTable( | 5127 p_value_adj = contrast_data_adj_p_values |
| 5113 conn = db, | 5128 ) |
| 5114 name = "contrast_ppep_p_val", | 5129 db_write_table_overwrite <- (contrast < 2) |
| 5115 value = contrast_p_df, | 5130 db_write_table_append <- !db_write_table_overwrite |
| 5116 append = db_write_table_append | 5131 RSQLite::dbWriteTable( |
| 5117 ) | 5132 conn = db, |
| 5118 # Create UK for insert | 5133 name = "contrast_ppep_p_val", |
| 5119 ddl_exec(db, " | 5134 value = contrast_p_df, |
| 5120 CREATE UNIQUE INDEX IF NOT EXISTS contrast_ppep_p_val__uk__idx | 5135 append = db_write_table_append |
| 5121 ON contrast_ppep_p_val(phosphopep, contrast); | 5136 ) |
| 5122 " | 5137 # Create UK for insert |
| 5123 ) | 5138 ddl_exec(db, " |
| 5139 CREATE UNIQUE INDEX IF NOT EXISTS contrast_ppep_p_val__uk__idx | |
| 5140 ON contrast_ppep_p_val(phosphopep, contrast); | |
| 5141 " | |
| 5142 ) | |
| 5143 } | |
| 5124 } | 5144 } |
| 5125 # Perhaps this could be done more elegantly using unique keys | 5145 # Perhaps this could be done more elegantly using unique keys |
| 5126 # or creating the tables before saving data to them, but this | 5146 # or creating the tables before saving data to them, but this |
| 5127 # is fast and, if the database exists on disk rather than in | 5147 # is fast and, if the database exists on disk rather than in |
| 5128 # memory, it doesn't stress memory. | 5148 # memory, it doesn't stress memory. |
| 6093 hm_main_title | 6113 hm_main_title |
| 6094 = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates", | 6114 = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates", |
| 6095 suppress_row_dendrogram = FALSE, | 6115 suppress_row_dendrogram = FALSE, |
| 6096 master_cex = 0.35, | 6116 master_cex = 0.35, |
| 6097 sepcolor = "black", | 6117 sepcolor = "black", |
| 6098 colsep = sample_colsep | 6118 colsep = sample_colsep, |
| 6119 row_scaling = "none" | |
| 6099 ) | 6120 ) |
| 6100 if (number_of_peptides_found > 1) { | 6121 if (number_of_peptides_found > 1) { |
| 6101 | 6122 |
| 6102 tryCatch( | 6123 tryCatch( |
| 6103 { | 6124 { |
