comparison data_manager/customProDB_annotation.R @ 3:af0a098e15bd draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/bumbershoot/data_manager_customProDB commit 141369f97aa2804d2bbfd9ed620ea2a5574994c2-dirty
author galaxyp
date Wed, 27 Jan 2016 16:46:54 -0500
parents 20a1b026b798
children b42974daa659
comparison
equal deleted inserted replaced
2:20a1b026b798 3:af0a098e15bd
15 15
16 16
17 option_list <- list() 17 option_list <- list()
18 option_list$dbkey <- make_option('--dbkey', type='character') 18 option_list$dbkey <- make_option('--dbkey', type='character')
19 option_list$outputFile <- make_option('--outputFile', type='character') 19 option_list$outputFile <- make_option('--outputFile', type='character')
20 option_list$dbkey_description <- make_option('--dbkey_description', type='character')
20 21
21 opt <- parse_args(OptionParser(option_list=option_list)) 22 opt <- parse_args(OptionParser(option_list=option_list))
22 23
23 24
24 customProDB_annotation <- function( 25 customProDB_annotation <- function(
25 dbkey = GalaxyCharacterParam(required=TRUE), 26 dbkey = GalaxyCharacterParam(required=TRUE),
27 dbkey_description = GalaxyCharacterParam(required=FALSE),
26 outputFile = GalaxyOutput("output","json")) 28 outputFile = GalaxyOutput("output","json"))
27 { 29 {
28 if (!file.exists(outputFile)) 30 if (!file.exists(outputFile))
29 { 31 {
30 gstop("json params file does not exist") 32 gstop("json params file does not exist")
31 } 33 }
32 34
35 if (length(dbkey_description) < 1)
36 {
37 dbkey_description = dbkey
38 }
39
33 suppressPackageStartupMessages(library(rjson)) 40 suppressPackageStartupMessages(library(rjson))
34 params = fromJSON(file=outputFile) 41 params = fromJSON(file=outputFile)
35 target_directory = params$output_data[[1]]$extra_files_path 42 target_directory = params$output_data[[1]]$extra_files_path
36 data_description = params$param_dict$advanced$data_description
37 data_id = params$param_dict$advanced$data_id
38 dir.create(target_directory) 43 dir.create(target_directory)
39 44
40 tryCatch( 45 tryCatch(
41 { 46 {
42 #file.remove(outputFile) 47 file.remove(outputFile)
43 }, error=function(err) 48 }, error=function(err)
44 { 49 {
45 gstop("failed to remove json params file after reading") 50 gstop("failed to remove json params file after reading")
46 }) 51 })
47 52
48 ucscTableCodingFastaURL = paste("http://genome.ucsc.edu/cgi-bin/hgTables?db=", dbkey, "&hgSeq.cdsExon=on&hgSeq.granularity=gene&hgSeq.casing=exon&hgSeq.repMasking=lower&hgta_doGenomicDna=get+sequence&hgta_group=genes&hgta_track=refGene&hgta_table=refGene&hgta_regionType=genome", sep="") 53 ucscTableCodingFastaURL = paste("http://genome.ucsc.edu/cgi-bin/hgTables?db=", dbkey, "&hgSeq.cdsExon=on&hgSeq.granularity=gene&hgSeq.casing=exon&hgSeq.repMasking=lower&hgta_doGenomicDna=get+sequence&hgta_group=genes&hgta_track=refGene&hgta_table=refGene&hgta_regionType=genome", sep="")
49 ucscTableProteinFastaURL = paste("http://genome.ucsc.edu/cgi-bin/hgTables?db=", dbkey, "&hgta_geneSeqType=protein&hgta_doGenePredSequence=submit&hgta_track=refGene&hgta_table=refGene", sep="") 54 ucscTableProteinFastaURL = paste("http://genome.ucsc.edu/cgi-bin/hgTables?db=", dbkey, "&hgta_geneSeqType=protein&hgta_doGenePredSequence=submit&hgta_track=refGene&hgta_table=refGene", sep="")
50 codingFastaFilepath = paste(target_directory, "/", dbkey, ".cds.fa", sep="") 55 codingFastaFilepath = paste(target_directory, "/", dbkey, ".cds.fa", sep="")
51 proteinFastaFilepath = paste(target_directory, "/", dbkey, ".protein.fa", sep="") 56 proteinFastaFilepath = paste(target_directory, "/", dbkey, ".protein.fa", sep="")
57
52 suppressPackageStartupMessages(library(customProDB)) 58 suppressPackageStartupMessages(library(customProDB))
53 options(timeout=3600) 59 options(timeout=3600)
60
54 cat(paste("Downloading coding FASTA from:", ucscTableCodingFastaURL, "\n")) 61 cat(paste("Downloading coding FASTA from:", ucscTableCodingFastaURL, "\n"))
55 download.file(ucscTableCodingFastaURL, codingFastaFilepath, quiet=T, mode='wb') 62 download.file(ucscTableCodingFastaURL, codingFastaFilepath, quiet=T, mode='wb')
63
56 cat(paste("Downloading protein FASTA from:", ucscTableProteinFastaURL, "\n")) 64 cat(paste("Downloading protein FASTA from:", ucscTableProteinFastaURL, "\n"))
57 download.file(ucscTableProteinFastaURL, proteinFastaFilepath, quiet=T, mode='wb') 65 download.file(ucscTableProteinFastaURL, proteinFastaFilepath, quiet=T, mode='wb')
66
67 cat(paste("Preparing Refseq annotation files\n"))
58 customProDB::PrepareAnnotationRefseq(genome=dbkey, CDSfasta=codingFastaFilepath, pepfasta=proteinFastaFilepath, annotation_path=target_directory) 68 customProDB::PrepareAnnotationRefseq(genome=dbkey, CDSfasta=codingFastaFilepath, pepfasta=proteinFastaFilepath, annotation_path=target_directory)
59 69
60 outputPath = paste("customProDB/", dbkey, sep="") 70 outputPath = paste(dbkey, "/customProDB", sep="")
61 output = list(data_tables = list()) 71 output = list(data_tables = list())
62 output[["data_tables"]][["customProDB"]]=c(path=outputPath, name=dbkey, value=dbkey, dbkey=dbkey) 72 output[["data_tables"]][["customProDB"]]=c(path=outputPath, name=dbkey_description, dbkey=dbkey)
63 write(toJSON(output), file=outputFile) 73 write(toJSON(output), file=outputFile)
64 } 74 }
65 75
66 76
67 params <- list() 77 params <- list()