Mercurial > repos > iuc > multigsea
comparison multiGSEA.R @ 0:d1925dbabe98 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/multigsea commit 5c1b8a2b105a80e236f88e71a743147d79925ac4
| author | iuc |
|---|---|
| date | Wed, 07 Jun 2023 19:48:35 +0000 |
| parents | |
| children | c06f093f3758 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d1925dbabe98 |
|---|---|
| 1 library(multiGSEA, | |
| 2 quietly = TRUE, | |
| 3 warn.conflicts = FALSE) | |
| 4 library(argparse, quietly = TRUE, warn.conflicts = FALSE) | |
| 5 | |
| 6 ################################################################################ | |
| 7 ### Input Processing | |
| 8 ################################################################################ | |
| 9 | |
| 10 | |
| 11 # Collect arguments from command line | |
| 12 parser <- ArgumentParser(description = "multiGSEA R script") | |
| 13 | |
| 14 parser$add_argument("--transcriptomics", required = FALSE, | |
| 15 help = "Transcriptomics data") | |
| 16 parser$add_argument( | |
| 17 "--transcriptome_ids", | |
| 18 required = FALSE, | |
| 19 help = "Transcriptomics ids", | |
| 20 default = "SYMBOL" | |
| 21 ) | |
| 22 parser$add_argument("--proteomics", required = FALSE, | |
| 23 help = "Proteomics data") | |
| 24 parser$add_argument( | |
| 25 "--proteome_ids", | |
| 26 required = FALSE, | |
| 27 help = "Proteomics ids", | |
| 28 default = "SYMBOL" | |
| 29 ) | |
| 30 parser$add_argument("--metabolomics", required = FALSE, | |
| 31 help = "Metabolomics data") | |
| 32 parser$add_argument( | |
| 33 "--metabolome_ids", | |
| 34 required = FALSE, | |
| 35 help = "Metabolomics ids", | |
| 36 default = "HMDB" | |
| 37 ) | |
| 38 parser$add_argument("--organism", required = TRUE, | |
| 39 help = "Organism") | |
| 40 parser$add_argument("--combine_pvalues", required = TRUE, | |
| 41 help = "Combine p-values method") | |
| 42 parser$add_argument("--padj_method", required = TRUE, | |
| 43 help = "P-adjustment method") | |
| 44 parser$add_argument("--databases", | |
| 45 required = TRUE, | |
| 46 help = "Pathway databases") | |
| 47 | |
| 48 args <- parser$parse_args() | |
| 49 | |
| 50 ## ----Load library------------------------------------------------------------- | |
| 51 | |
| 52 organism_mapping <- c( | |
| 53 "hsapiens" = "org.Hs.eg.db", | |
| 54 "mmusculus" = "org.Mm.eg.db", | |
| 55 "rnorvegicus" = "org.Rn.eg.db", | |
| 56 "cfamiliaris" = "org.Cf.eg.db", | |
| 57 "btaurus" = "org.Bt.eg.db", | |
| 58 "sscrofa" = "org.Ss.eg.db", | |
| 59 "ggallus" = "org.Gg.eg.db", | |
| 60 "drerio" = "org.Xl.eg.db", | |
| 61 "xlaevis" = "org.Dr.eg.db", | |
| 62 "dmelanogaster" = "org.Dm.eg.db", | |
| 63 "celegans" = "org.Ce.eg.db" | |
| 64 ) | |
| 65 | |
| 66 library(organism_mapping[args$organism], character.only = TRUE) | |
| 67 | |
| 68 | |
| 69 ## ----Load omics data---------------------------------------------------------- | |
| 70 | |
| 71 layer <- c() | |
| 72 | |
| 73 if (!is.null(args$transcriptomics)) { | |
| 74 transcriptome <- read.csv( | |
| 75 args$transcriptomics, | |
| 76 header = TRUE, | |
| 77 sep = "\t", | |
| 78 dec = "." | |
| 79 ) | |
| 80 layer <- append(layer, "transcriptome") | |
| 81 } | |
| 82 | |
| 83 if (!is.null(args$proteomics)) { | |
| 84 proteome <- read.csv(args$proteomics, | |
| 85 header = TRUE, | |
| 86 sep = "\t", | |
| 87 dec = ".") | |
| 88 layer <- append(layer, "proteome") | |
| 89 } | |
| 90 | |
| 91 if (!is.null(args$metabolomics)) { | |
| 92 metabolome <- read.csv(args$metabolomics, | |
| 93 header = TRUE, | |
| 94 sep = "\t", | |
| 95 dec = ".") | |
| 96 layer <- append(layer, "metabolome") | |
| 97 } | |
| 98 | |
| 99 ## ----rank_features------------------------------------------------------------ | |
| 100 | |
| 101 # create data structure | |
| 102 omics_data <- initOmicsDataStructure(layer) | |
| 103 | |
| 104 ## add transcriptome layer | |
| 105 if (!is.null(args$transcriptomics)) { | |
| 106 omics_data$transcriptome <- rankFeatures(transcriptome$logFC, | |
| 107 transcriptome$pValue) | |
| 108 names(omics_data$transcriptome) <- transcriptome$Symbol | |
| 109 } | |
| 110 | |
| 111 ## add proteome layer | |
| 112 if (!is.null(args$proteomics)) { | |
| 113 omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue) | |
| 114 names(omics_data$proteome) <- proteome$Symbol | |
| 115 } | |
| 116 | |
| 117 ## add metabolome layer | |
| 118 ## HMDB features have to be updated to the new HMDB format | |
| 119 if (!is.null(args$metabolomics)) { | |
| 120 omics_data$metabolome <- | |
| 121 rankFeatures(metabolome$logFC, metabolome$pValue) | |
| 122 names(omics_data$metabolome) <- metabolome$HMDB | |
| 123 names(omics_data$metabolome) <- gsub("HMDB", "HMDB00", | |
| 124 names(omics_data$metabolome)) | |
| 125 } | |
| 126 | |
| 127 | |
| 128 ## remove NA's and sort feature ranks | |
| 129 omics_data <- lapply(omics_data, function(vec) { | |
| 130 sort(vec[!is.na(vec)]) | |
| 131 }) | |
| 132 | |
| 133 ## ----Pathway definitions------------------------------------------------------ | |
| 134 | |
| 135 pathways <- | |
| 136 getMultiOmicsFeatures( | |
| 137 dbs = unlist(strsplit(args$databases, ",", fixed = TRUE)), | |
| 138 layer = layer, | |
| 139 returnTranscriptome = args$transcriptome_ids, | |
| 140 returnProteome = args$proteome_ids, | |
| 141 returnMetabolome = args$metabolome_ids, | |
| 142 organism = args$organism, | |
| 143 useLocal = FALSE | |
| 144 ) | |
| 145 | |
| 146 ## ----calculate enrichment----------------------------------------------------- | |
| 147 | |
| 148 enrichment_scores <- | |
| 149 multiGSEA(pathways, omics_data) | |
| 150 | |
| 151 ## ----combine_pvalues---------------------------------------------------------- | |
| 152 | |
| 153 df <- extractPvalues(enrichmentScores = enrichment_scores, | |
| 154 pathwayNames = names(pathways[[1]])) | |
| 155 | |
| 156 df$combined_pval <- | |
| 157 combinePvalues(df, method = args$combine_pvalues) | |
| 158 df$combined_padj <- | |
| 159 p.adjust(df$combined_pval, method = args$padj_method) | |
| 160 | |
| 161 df <- cbind(data.frame(pathway = names(pathways[[1]])), df) | |
| 162 | |
| 163 ## ----Write output------------------------------------------------------------- | |
| 164 | |
| 165 write.table( | |
| 166 df, | |
| 167 file = "results.tsv", | |
| 168 quote = FALSE, | |
| 169 sep = "\t", | |
| 170 col.names = TRUE, | |
| 171 row.names = FALSE | |
| 172 ) |
