Mercurial > repos > iuc > multigsea
comparison multiGSEA.R @ 1:c06f093f3758 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/multigsea commit eb12f339f6f2e72792e6db36848222227d43d008
| author | iuc |
|---|---|
| date | Fri, 28 Feb 2025 10:25:26 +0000 |
| parents | d1925dbabe98 |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d1925dbabe98 | 1:c06f093f3758 |
|---|---|
| 1 library(multiGSEA, | 1 library(multiGSEA, |
| 2 quietly = TRUE, | 2 quietly = TRUE, |
| 3 warn.conflicts = FALSE) | 3 warn.conflicts = FALSE |
| 4 ) | |
| 4 library(argparse, quietly = TRUE, warn.conflicts = FALSE) | 5 library(argparse, quietly = TRUE, warn.conflicts = FALSE) |
| 5 | 6 |
| 6 ################################################################################ | 7 ################################################################################ |
| 7 ### Input Processing | 8 ### Input Processing |
| 8 ################################################################################ | 9 ################################################################################ |
| 9 | 10 |
| 10 | 11 |
| 11 # Collect arguments from command line | 12 # Collect arguments from command line |
| 12 parser <- ArgumentParser(description = "multiGSEA R script") | 13 parser <- ArgumentParser(description = "multiGSEA R script") |
| 13 | 14 |
| 14 parser$add_argument("--transcriptomics", required = FALSE, | 15 parser$add_argument("--transcriptomics", |
| 15 help = "Transcriptomics data") | 16 required = FALSE, |
| 17 help = "Transcriptomics data" | |
| 18 ) | |
| 16 parser$add_argument( | 19 parser$add_argument( |
| 17 "--transcriptome_ids", | 20 "--transcriptome_ids", |
| 18 required = FALSE, | 21 required = FALSE, |
| 19 help = "Transcriptomics ids", | 22 help = "Transcriptomics ids", |
| 20 default = "SYMBOL" | 23 default = "SYMBOL" |
| 21 ) | 24 ) |
| 22 parser$add_argument("--proteomics", required = FALSE, | 25 parser$add_argument("--proteomics", |
| 23 help = "Proteomics data") | 26 required = FALSE, |
| 27 help = "Proteomics data" | |
| 28 ) | |
| 24 parser$add_argument( | 29 parser$add_argument( |
| 25 "--proteome_ids", | 30 "--proteome_ids", |
| 26 required = FALSE, | 31 required = FALSE, |
| 27 help = "Proteomics ids", | 32 help = "Proteomics ids", |
| 28 default = "SYMBOL" | 33 default = "SYMBOL" |
| 29 ) | 34 ) |
| 30 parser$add_argument("--metabolomics", required = FALSE, | 35 parser$add_argument("--metabolomics", |
| 31 help = "Metabolomics data") | 36 required = FALSE, |
| 37 help = "Metabolomics data" | |
| 38 ) | |
| 32 parser$add_argument( | 39 parser$add_argument( |
| 33 "--metabolome_ids", | 40 "--metabolome_ids", |
| 34 required = FALSE, | 41 required = FALSE, |
| 35 help = "Metabolomics ids", | 42 help = "Metabolomics ids", |
| 36 default = "HMDB" | 43 default = "HMDB" |
| 37 ) | 44 ) |
| 38 parser$add_argument("--organism", required = TRUE, | 45 parser$add_argument("--organism", |
| 39 help = "Organism") | 46 required = TRUE, |
| 40 parser$add_argument("--combine_pvalues", required = TRUE, | 47 help = "Organism" |
| 41 help = "Combine p-values method") | 48 ) |
| 42 parser$add_argument("--padj_method", required = TRUE, | 49 parser$add_argument("--combine_pvalues", |
| 43 help = "P-adjustment method") | 50 required = TRUE, |
| 51 help = "Combine p-values method" | |
| 52 ) | |
| 53 parser$add_argument("--padj_method", | |
| 54 required = TRUE, | |
| 55 help = "P-adjustment method" | |
| 56 ) | |
| 44 parser$add_argument("--databases", | 57 parser$add_argument("--databases", |
| 45 required = TRUE, | 58 required = TRUE, |
| 46 help = "Pathway databases") | 59 help = "Pathway databases" |
| 60 ) | |
| 47 | 61 |
| 48 args <- parser$parse_args() | 62 args <- parser$parse_args() |
| 49 | 63 |
| 50 ## ----Load library------------------------------------------------------------- | 64 ## ----Load library------------------------------------------------------------- |
| 51 | 65 |
| 52 organism_mapping <- c( | 66 organism_mapping <- c( |
| 53 "hsapiens" = "org.Hs.eg.db", | 67 "hsapiens" = "org.Hs.eg.db", |
| 54 "mmusculus" = "org.Mm.eg.db", | 68 "mmusculus" = "org.Mm.eg.db", |
| 55 "rnorvegicus" = "org.Rn.eg.db", | 69 "rnorvegicus" = "org.Rn.eg.db", |
| 56 "cfamiliaris" = "org.Cf.eg.db", | 70 "cfamiliaris" = "org.Cf.eg.db", |
| 57 "btaurus" = "org.Bt.eg.db", | 71 "btaurus" = "org.Bt.eg.db", |
| 58 "sscrofa" = "org.Ss.eg.db", | 72 "sscrofa" = "org.Ss.eg.db", |
| 59 "ggallus" = "org.Gg.eg.db", | 73 "ggallus" = "org.Gg.eg.db", |
| 60 "drerio" = "org.Xl.eg.db", | 74 "drerio" = "org.Xl.eg.db", |
| 61 "xlaevis" = "org.Dr.eg.db", | 75 "xlaevis" = "org.Dr.eg.db", |
| 62 "dmelanogaster" = "org.Dm.eg.db", | 76 "dmelanogaster" = "org.Dm.eg.db", |
| 63 "celegans" = "org.Ce.eg.db" | 77 "celegans" = "org.Ce.eg.db" |
| 64 ) | 78 ) |
| 65 | 79 |
| 66 library(organism_mapping[args$organism], character.only = TRUE) | 80 library(organism_mapping[args$organism], character.only = TRUE) |
| 67 | 81 |
| 68 | 82 |
| 69 ## ----Load omics data---------------------------------------------------------- | 83 ## ----Load omics data---------------------------------------------------------- |
| 70 | 84 |
| 71 layer <- c() | 85 layer <- c() |
| 72 | 86 |
| 73 if (!is.null(args$transcriptomics)) { | 87 if (!is.null(args$transcriptomics)) { |
| 74 transcriptome <- read.csv( | 88 transcriptome <- read.csv( |
| 75 args$transcriptomics, | 89 args$transcriptomics, |
| 76 header = TRUE, | 90 header = TRUE, |
| 77 sep = "\t", | 91 sep = "\t", |
| 78 dec = "." | 92 dec = "." |
| 79 ) | 93 ) |
| 80 layer <- append(layer, "transcriptome") | 94 layer <- append(layer, "transcriptome") |
| 81 } | 95 } |
| 82 | 96 |
| 83 if (!is.null(args$proteomics)) { | 97 if (!is.null(args$proteomics)) { |
| 84 proteome <- read.csv(args$proteomics, | 98 proteome <- read.csv(args$proteomics, |
| 85 header = TRUE, | 99 header = TRUE, |
| 86 sep = "\t", | 100 sep = "\t", |
| 87 dec = ".") | 101 dec = "." |
| 88 layer <- append(layer, "proteome") | 102 ) |
| 103 layer <- append(layer, "proteome") | |
| 89 } | 104 } |
| 90 | 105 |
| 91 if (!is.null(args$metabolomics)) { | 106 if (!is.null(args$metabolomics)) { |
| 92 metabolome <- read.csv(args$metabolomics, | 107 metabolome <- read.csv(args$metabolomics, |
| 93 header = TRUE, | 108 header = TRUE, |
| 94 sep = "\t", | 109 sep = "\t", |
| 95 dec = ".") | 110 dec = "." |
| 96 layer <- append(layer, "metabolome") | 111 ) |
| 112 layer <- append(layer, "metabolome") | |
| 97 } | 113 } |
| 98 | 114 |
| 99 ## ----rank_features------------------------------------------------------------ | 115 ## ----rank_features------------------------------------------------------------ |
| 100 | 116 |
| 101 # create data structure | 117 # create data structure |
| 102 omics_data <- initOmicsDataStructure(layer) | 118 omics_data <- initOmicsDataStructure(layer) |
| 103 | 119 |
| 104 ## add transcriptome layer | 120 ## add transcriptome layer |
| 105 if (!is.null(args$transcriptomics)) { | 121 if (!is.null(args$transcriptomics)) { |
| 106 omics_data$transcriptome <- rankFeatures(transcriptome$logFC, | 122 omics_data$transcriptome <- rankFeatures( |
| 107 transcriptome$pValue) | 123 transcriptome$logFC, |
| 108 names(omics_data$transcriptome) <- transcriptome$Symbol | 124 transcriptome$pValue |
| 125 ) | |
| 126 names(omics_data$transcriptome) <- transcriptome$Symbol | |
| 109 } | 127 } |
| 110 | 128 |
| 111 ## add proteome layer | 129 ## add proteome layer |
| 112 if (!is.null(args$proteomics)) { | 130 if (!is.null(args$proteomics)) { |
| 113 omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue) | 131 omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue) |
| 114 names(omics_data$proteome) <- proteome$Symbol | 132 names(omics_data$proteome) <- proteome$Symbol |
| 115 } | 133 } |
| 116 | 134 |
| 117 ## add metabolome layer | 135 ## add metabolome layer |
| 118 ## HMDB features have to be updated to the new HMDB format | 136 ## HMDB features have to be updated to the new HMDB format |
| 119 if (!is.null(args$metabolomics)) { | 137 if (!is.null(args$metabolomics)) { |
| 120 omics_data$metabolome <- | 138 omics_data$metabolome <- |
| 121 rankFeatures(metabolome$logFC, metabolome$pValue) | 139 rankFeatures(metabolome$logFC, metabolome$pValue) |
| 122 names(omics_data$metabolome) <- metabolome$HMDB | 140 names(omics_data$metabolome) <- metabolome$HMDB |
| 123 names(omics_data$metabolome) <- gsub("HMDB", "HMDB00", | 141 names(omics_data$metabolome) <- gsub( |
| 124 names(omics_data$metabolome)) | 142 "HMDB", "HMDB00", |
| 143 names(omics_data$metabolome) | |
| 144 ) | |
| 125 } | 145 } |
| 126 | 146 |
| 127 | 147 |
| 128 ## remove NA's and sort feature ranks | 148 ## remove NA's and sort feature ranks |
| 129 omics_data <- lapply(omics_data, function(vec) { | 149 omics_data <- lapply(omics_data, function(vec) { |
| 130 sort(vec[!is.na(vec)]) | 150 sort(vec[!is.na(vec)]) |
| 131 }) | 151 }) |
| 132 | 152 |
| 133 ## ----Pathway definitions------------------------------------------------------ | 153 ## ----Pathway definitions------------------------------------------------------ |
| 134 | 154 |
| 135 pathways <- | 155 pathways <- |
| 136 getMultiOmicsFeatures( | 156 getMultiOmicsFeatures( |
| 137 dbs = unlist(strsplit(args$databases, ",", fixed = TRUE)), | 157 dbs = unlist(strsplit(args$databases, ",", fixed = TRUE)), |
| 138 layer = layer, | 158 layer = layer, |
| 139 returnTranscriptome = args$transcriptome_ids, | 159 returnTranscriptome = args$transcriptome_ids, |
| 140 returnProteome = args$proteome_ids, | 160 returnProteome = args$proteome_ids, |
| 141 returnMetabolome = args$metabolome_ids, | 161 returnMetabolome = args$metabolome_ids, |
| 142 organism = args$organism, | 162 organism = args$organism, |
| 143 useLocal = FALSE | 163 useLocal = FALSE |
| 144 ) | 164 ) |
| 145 | 165 |
| 146 ## ----calculate enrichment----------------------------------------------------- | 166 ## ----calculate enrichment----------------------------------------------------- |
| 147 | 167 |
| 148 enrichment_scores <- | 168 enrichment_scores <- |
| 149 multiGSEA(pathways, omics_data) | 169 multiGSEA(pathways, omics_data) |
| 150 | 170 |
| 151 ## ----combine_pvalues---------------------------------------------------------- | 171 ## ----combine_pvalues---------------------------------------------------------- |
| 152 | 172 |
| 153 df <- extractPvalues(enrichmentScores = enrichment_scores, | 173 df <- extractPvalues( |
| 154 pathwayNames = names(pathways[[1]])) | 174 enrichmentScores = enrichment_scores, |
| 175 pathwayNames = names(pathways[[1]]) | |
| 176 ) | |
| 155 | 177 |
| 156 df$combined_pval <- | 178 df$combined_pval <- |
| 157 combinePvalues(df, method = args$combine_pvalues) | 179 combinePvalues(df, method = args$combine_pvalues) |
| 158 df$combined_padj <- | 180 df$combined_padj <- |
| 159 p.adjust(df$combined_pval, method = args$padj_method) | 181 p.adjust(df$combined_pval, method = args$padj_method) |
| 160 | 182 |
| 161 df <- cbind(data.frame(pathway = names(pathways[[1]])), df) | 183 df <- cbind(data.frame(pathway = names(pathways[[1]])), df) |
| 162 | 184 |
| 163 ## ----Write output------------------------------------------------------------- | 185 ## ----Write output------------------------------------------------------------- |
| 164 | 186 |
| 165 write.table( | 187 write.table( |
| 166 df, | 188 df, |
| 167 file = "results.tsv", | 189 file = "results.tsv", |
| 168 quote = FALSE, | 190 quote = FALSE, |
| 169 sep = "\t", | 191 sep = "\t", |
| 170 col.names = TRUE, | 192 col.names = TRUE, |
| 171 row.names = FALSE | 193 row.names = FALSE |
| 172 ) | 194 ) |
