Mercurial > repos > mvdbeek > r_goseq_1_22_0
comparison get_length_and_gc_content.r @ 0:a6427f7893b0 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
author | mvdbeek |
---|---|
date | Fri, 26 Feb 2016 06:57:47 -0500 |
parents | |
children | 0e9424413ab0 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a6427f7893b0 |
---|---|
1 # originally by Devon Ryan, https://www.biostars.org/p/84467/ | |
2 sink(stdout(), type = "message") | |
3 | |
4 library(GenomicRanges) | |
5 library(rtracklayer) | |
6 library(Rsamtools) | |
7 library(optparse) | |
8 library(data.table) | |
9 | |
10 option_list <- list( | |
11 make_option(c("-g","--gtf"), type="character", help="Input GTF file with gene / exon information."), | |
12 make_option(c("-f","--fasta"), type="character", default=FALSE, help="Fasta file that corresponds to the supplied GTF."), | |
13 make_option(c("-o","--output"), type="character", default=FALSE, help="Output file with gene name, length and GC content.") | |
14 ) | |
15 | |
16 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) | |
17 args = parse_args(parser) | |
18 | |
19 GTFfile = args$gtf | |
20 FASTAfile = args$fasta | |
21 output_file = args$output | |
22 | |
23 #Load the annotation and reduce it | |
24 GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon") | |
25 grl <- reduce(split(GTF, elementMetadata(GTF)$gene_id)) | |
26 reducedGTF <- unlist(grl, use.names=T) | |
27 elementMetadata(reducedGTF)$gene_id <- rep(names(grl), elementLengths(grl)) | |
28 | |
29 #Open the fasta file | |
30 FASTA <- FaFile(FASTAfile) | |
31 open(FASTA) | |
32 | |
33 #Add the GC numbers | |
34 elementMetadata(reducedGTF)$nGCs <- letterFrequency(getSeq(FASTA, reducedGTF), "GC")[,1] | |
35 elementMetadata(reducedGTF)$widths <- width(reducedGTF) | |
36 | |
37 #Create a list of the ensembl_id/GC/length | |
38 calc_GC_length <- function(x) { | |
39 nGCs = sum(elementMetadata(x)$nGCs) | |
40 width = sum(elementMetadata(x)$widths) | |
41 c(width, nGCs/width) | |
42 } | |
43 output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length)) | |
44 output <- setDT(data.frame(output), keep.rownames = TRUE)[] | |
45 colnames(output) <- c("#gene_id", "length", "GC") | |
46 | |
47 write.table(output, file=output_file, row.names=FALSE, quote=FALSE, sep="\t") | |
48 | |
49 sessionInfo() |