Mercurial > repos > proteore > proteore_prot_features
changeset 3:7746af0f8209 draft
planemo upload commit 13e1789dd0070e7df7651c34cc1b667c793b8c15-dirty
| author | proteore |
|---|---|
| date | Fri, 07 Dec 2018 05:09:57 -0500 |
| parents | 9525c7c2d5d6 |
| children | 759850de6ed2 |
| files | prot_features.xml protein_features.R |
| diffstat | 2 files changed, 11 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/prot_features.xml Thu Dec 06 05:25:41 2018 -0500 +++ b/prot_features.xml Fri Dec 07 05:09:57 2018 -0500 @@ -1,4 +1,4 @@ -<tool id="prot_features" name="Add human protein features" version="2018.12.04"> +<tool id="prot_features" name="Add human protein features" version="2018.12.07"> <description>(neXtProt) </description> <requirements>
--- a/protein_features.R Thu Dec 06 05:25:41 2018 -0500 +++ b/protein_features.R Fri Dec 07 05:09:57 2018 -0500 @@ -21,6 +21,7 @@ get_list_from_cp <-function(list){ list = strsplit(list, "[ \t\n]+")[[1]] + list = gsub("NA","",list) list = list[list != ""] #remove empty entry list = gsub("-.+", "", list) #Remove isoform accession number (e.g. "-2") return(list) @@ -142,7 +143,7 @@ inputtype = args$inputtype if (inputtype == "copy_paste") { input = get_list_from_cp(args$input) - input = input[which(!is.na(input[input!=""]))] + file = data.frame(input,stringsAsFactors = F) ncol=1 } else if (inputtype == "file") { filename = args$input @@ -157,7 +158,6 @@ header = str2bool(args$header) file = read_file(filename, header) # Get file content if (any(grep(";",file[,ncol]))) {file = one_id_one_line(file,ncol)} - input = sapply(file[,ncol],function(x) strsplit(as.character(x),";")[[1]][1],USE.NAMES = F) # Extract Protein IDs list if (args$type == "NextprotID" && ! "NextprotID" %in% colnames(file)) { colnames(file)[ncol] <- "NextprotID" } else if (args$type == "NextprotID" && "NextprotID" %in% colnames(file) && match("NextprotID",colnames(file))!=ncol ) { colnames(file)[match("NextprotID",colnames(file))] <- "old_NextprotID" @@ -178,31 +178,21 @@ # Change the sample ids if they are Uniprot_AC ids to be able to match them with # Nextprot data if (id_type=="Uniprot_AC"){ - NextprotID = gsub("^","NX_",input) - if (inputtype == "file" && "NextprotID" %in% colnames(file)){ - colnames(file)[match("NextprotID",colnames(file))] <- "old_NextprotID" - file = cbind(file,NextprotID) - } else { - file = data.frame(cbind(input,NextprotID)) - colnames(file)[1]="Uniprot-AC" - } - } else if (id_type=="NextprotID") { - if (inputtype == "file") { - NextprotID = file$NextprotID - } else { - NextprotID = input - file=data.frame(NextprotID) - } + NextprotID = gsub("^NX_$","",gsub("^","NX_",file[,ncol])) + file = cbind(file,NextprotID) + if (inputtype=="copy_paste") {colnames(file)[1]="Uniprot-AC"} + ncol=ncol(file) } + NextprotID = file[,ncol] #Select user input protein ids in nextprot - NextprotID = unique(NextprotID[which(!is.na(NextprotID[NextprotID!=""]))]) - if ((length(NextprotID[NextprotID %in% nextprot[,1]]))==0){ + #NextprotID = unique(NextprotID[which(!is.na(NextprotID[NextprotID!=""]))]) + if (all(!NextprotID %in% nextprot[,1])){ write.table("None of the input ids can be found in Nextprot",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) } else { res <- get_nextprot_info(nextprot,NextprotID,pc_features,localization,diseases_info) res = res[!duplicated(res$NextprotID),] - output_content = merge(file, res,by="NextprotID",incomparables = NA,all.x=T) + output_content = merge(file, res,by.x=ncol,by.y="NextprotID",incomparables = NA,all.x=T) output_content = order_columns(output_content,ncol,id_type,file) output_content <- as.data.frame(apply(output_content, c(1,2), function(x) gsub("^$|^ $", NA, x))) #convert "" et " " to NA write.table(output_content, output, row.names = FALSE, sep = "\t", quote = FALSE)
