Mercurial > repos > galaxyp > custom_pro_db_annotation_data_manager
comparison data_manager/customProDB_annotation.R @ 3:af0a098e15bd draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/bumbershoot/data_manager_customProDB commit 141369f97aa2804d2bbfd9ed620ea2a5574994c2-dirty
author | galaxyp |
---|---|
date | Wed, 27 Jan 2016 16:46:54 -0500 |
parents | 20a1b026b798 |
children | b42974daa659 |
comparison
equal
deleted
inserted
replaced
2:20a1b026b798 | 3:af0a098e15bd |
---|---|
15 | 15 |
16 | 16 |
17 option_list <- list() | 17 option_list <- list() |
18 option_list$dbkey <- make_option('--dbkey', type='character') | 18 option_list$dbkey <- make_option('--dbkey', type='character') |
19 option_list$outputFile <- make_option('--outputFile', type='character') | 19 option_list$outputFile <- make_option('--outputFile', type='character') |
20 option_list$dbkey_description <- make_option('--dbkey_description', type='character') | |
20 | 21 |
21 opt <- parse_args(OptionParser(option_list=option_list)) | 22 opt <- parse_args(OptionParser(option_list=option_list)) |
22 | 23 |
23 | 24 |
24 customProDB_annotation <- function( | 25 customProDB_annotation <- function( |
25 dbkey = GalaxyCharacterParam(required=TRUE), | 26 dbkey = GalaxyCharacterParam(required=TRUE), |
27 dbkey_description = GalaxyCharacterParam(required=FALSE), | |
26 outputFile = GalaxyOutput("output","json")) | 28 outputFile = GalaxyOutput("output","json")) |
27 { | 29 { |
28 if (!file.exists(outputFile)) | 30 if (!file.exists(outputFile)) |
29 { | 31 { |
30 gstop("json params file does not exist") | 32 gstop("json params file does not exist") |
31 } | 33 } |
32 | 34 |
35 if (length(dbkey_description) < 1) | |
36 { | |
37 dbkey_description = dbkey | |
38 } | |
39 | |
33 suppressPackageStartupMessages(library(rjson)) | 40 suppressPackageStartupMessages(library(rjson)) |
34 params = fromJSON(file=outputFile) | 41 params = fromJSON(file=outputFile) |
35 target_directory = params$output_data[[1]]$extra_files_path | 42 target_directory = params$output_data[[1]]$extra_files_path |
36 data_description = params$param_dict$advanced$data_description | |
37 data_id = params$param_dict$advanced$data_id | |
38 dir.create(target_directory) | 43 dir.create(target_directory) |
39 | 44 |
40 tryCatch( | 45 tryCatch( |
41 { | 46 { |
42 #file.remove(outputFile) | 47 file.remove(outputFile) |
43 }, error=function(err) | 48 }, error=function(err) |
44 { | 49 { |
45 gstop("failed to remove json params file after reading") | 50 gstop("failed to remove json params file after reading") |
46 }) | 51 }) |
47 | 52 |
48 ucscTableCodingFastaURL = paste("http://genome.ucsc.edu/cgi-bin/hgTables?db=", dbkey, "&hgSeq.cdsExon=on&hgSeq.granularity=gene&hgSeq.casing=exon&hgSeq.repMasking=lower&hgta_doGenomicDna=get+sequence&hgta_group=genes&hgta_track=refGene&hgta_table=refGene&hgta_regionType=genome", sep="") | 53 ucscTableCodingFastaURL = paste("http://genome.ucsc.edu/cgi-bin/hgTables?db=", dbkey, "&hgSeq.cdsExon=on&hgSeq.granularity=gene&hgSeq.casing=exon&hgSeq.repMasking=lower&hgta_doGenomicDna=get+sequence&hgta_group=genes&hgta_track=refGene&hgta_table=refGene&hgta_regionType=genome", sep="") |
49 ucscTableProteinFastaURL = paste("http://genome.ucsc.edu/cgi-bin/hgTables?db=", dbkey, "&hgta_geneSeqType=protein&hgta_doGenePredSequence=submit&hgta_track=refGene&hgta_table=refGene", sep="") | 54 ucscTableProteinFastaURL = paste("http://genome.ucsc.edu/cgi-bin/hgTables?db=", dbkey, "&hgta_geneSeqType=protein&hgta_doGenePredSequence=submit&hgta_track=refGene&hgta_table=refGene", sep="") |
50 codingFastaFilepath = paste(target_directory, "/", dbkey, ".cds.fa", sep="") | 55 codingFastaFilepath = paste(target_directory, "/", dbkey, ".cds.fa", sep="") |
51 proteinFastaFilepath = paste(target_directory, "/", dbkey, ".protein.fa", sep="") | 56 proteinFastaFilepath = paste(target_directory, "/", dbkey, ".protein.fa", sep="") |
57 | |
52 suppressPackageStartupMessages(library(customProDB)) | 58 suppressPackageStartupMessages(library(customProDB)) |
53 options(timeout=3600) | 59 options(timeout=3600) |
60 | |
54 cat(paste("Downloading coding FASTA from:", ucscTableCodingFastaURL, "\n")) | 61 cat(paste("Downloading coding FASTA from:", ucscTableCodingFastaURL, "\n")) |
55 download.file(ucscTableCodingFastaURL, codingFastaFilepath, quiet=T, mode='wb') | 62 download.file(ucscTableCodingFastaURL, codingFastaFilepath, quiet=T, mode='wb') |
63 | |
56 cat(paste("Downloading protein FASTA from:", ucscTableProteinFastaURL, "\n")) | 64 cat(paste("Downloading protein FASTA from:", ucscTableProteinFastaURL, "\n")) |
57 download.file(ucscTableProteinFastaURL, proteinFastaFilepath, quiet=T, mode='wb') | 65 download.file(ucscTableProteinFastaURL, proteinFastaFilepath, quiet=T, mode='wb') |
66 | |
67 cat(paste("Preparing Refseq annotation files\n")) | |
58 customProDB::PrepareAnnotationRefseq(genome=dbkey, CDSfasta=codingFastaFilepath, pepfasta=proteinFastaFilepath, annotation_path=target_directory) | 68 customProDB::PrepareAnnotationRefseq(genome=dbkey, CDSfasta=codingFastaFilepath, pepfasta=proteinFastaFilepath, annotation_path=target_directory) |
59 | 69 |
60 outputPath = paste("customProDB/", dbkey, sep="") | 70 outputPath = paste(dbkey, "/customProDB", sep="") |
61 output = list(data_tables = list()) | 71 output = list(data_tables = list()) |
62 output[["data_tables"]][["customProDB"]]=c(path=outputPath, name=dbkey, value=dbkey, dbkey=dbkey) | 72 output[["data_tables"]][["customProDB"]]=c(path=outputPath, name=dbkey_description, dbkey=dbkey) |
63 write(toJSON(output), file=outputFile) | 73 write(toJSON(output), file=outputFile) |
64 } | 74 } |
65 | 75 |
66 | 76 |
67 params <- list() | 77 params <- list() |