comparison get_length_and_gc_content.r @ 6:0e9424413ab0 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
author mvdbeek
date Thu, 03 Mar 2016 09:56:51 -0500
parents a6427f7893b0
children
comparison
equal deleted inserted replaced
5:b79c65c90744 6:0e9424413ab0
1 # originally by Devon Ryan, https://www.biostars.org/p/84467/ 1 # originally by Devon Ryan, https://www.biostars.org/p/84467/
2 sink(stdout(), type = "message")
3 2
4 library(GenomicRanges) 3 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
5 library(rtracklayer) 4
6 library(Rsamtools) 5 # we need that to not crash galaxy with an UTF8 error on German LC settings.
7 library(optparse) 6 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
8 library(data.table) 7
8 suppressPackageStartupMessages({
9 library("GenomicRanges")
10 library("rtracklayer")
11 library("Rsamtools")
12 library("optparse")
13 library("data.table")
14 })
9 15
10 option_list <- list( 16 option_list <- list(
11 make_option(c("-g","--gtf"), type="character", help="Input GTF file with gene / exon information."), 17 make_option(c("-g","--gtf"), type="character", help="Input GTF file with gene / exon information."),
12 make_option(c("-f","--fasta"), type="character", default=FALSE, help="Fasta file that corresponds to the supplied GTF."), 18 make_option(c("-f","--fasta"), type="character", default=FALSE, help="Fasta file that corresponds to the supplied GTF."),
13 make_option(c("-o","--output"), type="character", default=FALSE, help="Output file with gene name, length and GC content.") 19 make_option(c("-l","--length"), type="character", default=FALSE, help="Output file with gene name and length."),
20 make_option(c("-gc","--gc_content"), type="character", default=FALSE, help="Output file with gene name and GC content.")
14 ) 21 )
15 22
16 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) 23 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list)
17 args = parse_args(parser) 24 args = parse_args(parser)
18 25
19 GTFfile = args$gtf 26 GTFfile = args$gtf
20 FASTAfile = args$fasta 27 FASTAfile = args$fasta
21 output_file = args$output 28 length = args$length
29 gc_content = args$gc_content
22 30
23 #Load the annotation and reduce it 31 #Load the annotation and reduce it
24 GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon") 32 GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon")
25 grl <- reduce(split(GTF, elementMetadata(GTF)$gene_id)) 33 grl <- reduce(split(GTF, elementMetadata(GTF)$gene_id))
26 reducedGTF <- unlist(grl, use.names=T) 34 reducedGTF <- unlist(grl, use.names=T)
39 nGCs = sum(elementMetadata(x)$nGCs) 47 nGCs = sum(elementMetadata(x)$nGCs)
40 width = sum(elementMetadata(x)$widths) 48 width = sum(elementMetadata(x)$widths)
41 c(width, nGCs/width) 49 c(width, nGCs/width)
42 } 50 }
43 output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length)) 51 output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length))
44 output <- setDT(data.frame(output), keep.rownames = TRUE)[] 52 output <- data.frame(setDT(data.frame(output), keep.rownames = TRUE)[])
45 colnames(output) <- c("#gene_id", "length", "GC")
46 53
47 write.table(output, file=output_file, row.names=FALSE, quote=FALSE, sep="\t") 54
55 write.table(output[,c(1,2)], file=length, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
56 write.table(output[,c(1,3)], file=gc_content, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
57
48 58
49 sessionInfo() 59 sessionInfo()