Mercurial > repos > proteore > proteore_topgo
changeset 11:ddcc0347c54a draft
planemo upload commit 76a36ad5001b9d90c680ff389c7ab7187a790275-dirty
| author | proteore |
|---|---|
| date | Tue, 16 Oct 2018 10:28:50 -0400 |
| parents | 511b060e9890 |
| children | 3d6b76f301c2 |
| files | topGO.xml topGO_enrichment.R |
| diffstat | 2 files changed, 90 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/topGO.xml Mon Oct 15 11:30:04 2018 -0400 +++ b/topGO.xml Tue Oct 16 10:28:50 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="topGO" name="Enrichment analysis for Gene Ontology" version="2018.10.12"> +<tool id="topGO" name="Enrichment analysis for Gene Ontology" version="2018.10.16"> <description>(Human, Mouse, Rat) (topGO)</description> <requirements> <requirement type="package" version="3.4.1">R</requirement> @@ -36,6 +36,16 @@ --barplotoutput='$barplot' --dotplotoutput='$dotplot' --geneuniverse='$geneuniverse' + --background="$background_genes.background" + + #if $background_genes.background == "true" + --background_genes="$background_genes.inputtype.genelist" + --background_input_type="$background_genes.inputtype.filetype" + #if $background_genes.inputtype.filetype == "file" + --background_header="$background_genes.inputtype.header" + --background_column="$background_genes.inputtype.column" + #end if + #end if ]]></command> @@ -63,6 +73,35 @@ <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your file have a header?" /> </when> </conditional> + <conditional name="background_genes"> + <param name="background" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Would you like to define your own background IDs"/> + <when value="true"> + <conditional name="inputtype"> + <param name="filetype" type="select" label="Select your type of input file" help="The identifiers must be Ensembl gene IDs (e.g : ENSG00000139618). If it is not the case, please use the ID Mapping tool."> + <option value="file" selected="true">Input file containing your identifiers</option> + <option value="copy_paste">Copy/paste your list of IDs</option> + </param> + <when value="copy_paste"> + <param name="genelist" type="text" label="Enter a list of identifiers"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> + </when> + <when value="file"> + <param name="genelist" type="data" format="txt,tabular" label="Choose an input file" help="This file must imperatively have 1 column filled with IDs consistent with the database that will be used. Please use the MappingIDs component if this is not the case."/> + <param name="column" type="text" label="Please specify the column where your Ensembl IDs are (e.g : Enter 'c1' for column n°1..)" value="c1"/> + <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your file have a header?" /> + </when> + </conditional> + </when> + <when value="false"/> + </conditional> <param name="geneuniverse" type="select" label="Select a specie"> <!--option value="org.At.tair.db" >Arabidopsis</option--> <option value="org.Ce.eg.db" >Worm (C. elegans)</option>
--- a/topGO_enrichment.R Mon Oct 15 11:30:04 2018 -0400 +++ b/topGO_enrichment.R Tue Oct 16 10:28:50 2018 -0400 @@ -47,7 +47,7 @@ } read_file <- function(path,header){ - file <- try(read.table(path,header=header, sep="\t",stringsAsFactors = FALSE, quote=""),silent=TRUE) + file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\""),silent=TRUE) if (inherits(file,"try-error")){ stop("File not found !") }else{ @@ -55,6 +55,14 @@ } } +get_list_from_cp <-function(list){ + list = gsub(";"," ",list) + list = strsplit(list, "[ \t\n]+")[[1]] + list = list[list != ""] #remove empty entry + list = gsub("-.+", "", list) #Remove isoform accession number (e.g. "-2") + return(list) +} + check_ens_ids <- function(vector) { ens_pattern = "^(ENS[A-Z]+[0-9]{11}|[A-Z]{3}[0-9]{3}[A-Za-z](-[A-Za-z])?|CG[0-9]+|[A-Z0-9]+\\.[0-9]+|YM[A-Z][0-9]{3}[a-z][0-9])$" return(grepl(ens_pattern,vector)) @@ -238,23 +246,26 @@ # Launch enrichment analysis and return result data from the analysis or the null # object if the enrichment could not be done. -goEnrichment = function(geneuniverse,sample,onto){ +goEnrichment = function(geneuniverse,sample,background_sample,onto){ - # get all the GO terms of the corresponding ontology (BP/CC/MF) and all their - # associated ensembl ids according to the org package - xx = annFUN.org(onto,mapping=geneuniverse,ID="ensembl") - allGenes = unique(unlist(xx)) - # check if the genes given by the user can be found in the org package (gene - # universe), that is in - # allGenes + if (is.null(background_sample)){ + xx = annFUN.org(onto,mapping=geneuniverse,ID="ensembl") # get all the GO terms of the corresponding ontology (BP/CC/MF) and all their associated ensembl ids according to the org package + allGenes = unique(unlist(xx)) # check if the genes given by the user can be found in the org package (gene universe), that is in allGenes + } else { + allGenes = background_sample + } + if (length(intersect(sample,allGenes))==0){ - print("None of the input ids can be found in the org package data, enrichment analysis cannot be realized. \n The inputs ids probably have no associated GO terms.") return(c(NULL,NULL)) - } geneList = factor(as.integer(allGenes %in% sample)) + if (levels(geneList) == "1" ){ + stop("All background genes are found in tested genes dataset, enrichment analysis can't be done") + } else if (levels(geneList)== "0"){ + stop("None of the background genes are found in tested genes dataset, enrichment analysis can't be done") + } names(geneList) <- allGenes #topGO enrichment @@ -292,13 +303,20 @@ column = as.numeric(gsub("c","",args$column)) geneuniverse = args$geneuniverse header = str2bool(args$header) +background = str2bool(args$background) +if (background){ + background_genes = args$background_genes + background_input_type = args$background_input_type + background_header = str2bool(args$background_header) + background_column = as.numeric(gsub("c","",args$background_column)) +} #get input if (input_type=="copy_paste"){ - sample <- unlist(strsplit(input,",")) + sample <- get_list_from_cp(input) } else if (input_type=="file"){ tab=read_file(input,header) - sample = tab[,column] + sample = trimws(unlist(strsplit(tab[,column],";"))) } #check of ENS ids @@ -307,8 +325,26 @@ stop() } +#get input if background genes +if (background){ + if (background_input_type=="copy_paste"){ + background_sample <- get_list_from_cp(background_genes) + } else if (background_input_type=="file"){ + background_tab=read_file(background_genes,background_header) + background_sample = unique(trimws(unlist(strsplit(background_tab[,background_column],";")))) + } +} else { + background_sample=NULL +} + +#check of ENS ids +if (! any(check_ens_ids(background_sample))){ + print("no ensembl gene ids found in your background ids list, please check your IDs in input or the selected column of your input file") + stop() +} + # Launch enrichment analysis -allresult = suppressMessages(goEnrichment(geneuniverse,sample,onto)) +allresult = suppressMessages(goEnrichment(geneuniverse,sample,background_sample,onto)) result = allresult[1][[1]] myGOdata = allresult[2][[1]] if (!is.null(result)){
