Mercurial > repos > mvdbeek > r_goseq_1_22_0
comparison get_length_and_gc_content.r @ 6:0e9424413ab0 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
author | mvdbeek |
---|---|
date | Thu, 03 Mar 2016 09:56:51 -0500 |
parents | a6427f7893b0 |
children |
comparison
equal
deleted
inserted
replaced
5:b79c65c90744 | 6:0e9424413ab0 |
---|---|
1 # originally by Devon Ryan, https://www.biostars.org/p/84467/ | 1 # originally by Devon Ryan, https://www.biostars.org/p/84467/ |
2 sink(stdout(), type = "message") | |
3 | 2 |
4 library(GenomicRanges) | 3 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) |
5 library(rtracklayer) | 4 |
6 library(Rsamtools) | 5 # we need that to not crash galaxy with an UTF8 error on German LC settings. |
7 library(optparse) | 6 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
8 library(data.table) | 7 |
8 suppressPackageStartupMessages({ | |
9 library("GenomicRanges") | |
10 library("rtracklayer") | |
11 library("Rsamtools") | |
12 library("optparse") | |
13 library("data.table") | |
14 }) | |
9 | 15 |
10 option_list <- list( | 16 option_list <- list( |
11 make_option(c("-g","--gtf"), type="character", help="Input GTF file with gene / exon information."), | 17 make_option(c("-g","--gtf"), type="character", help="Input GTF file with gene / exon information."), |
12 make_option(c("-f","--fasta"), type="character", default=FALSE, help="Fasta file that corresponds to the supplied GTF."), | 18 make_option(c("-f","--fasta"), type="character", default=FALSE, help="Fasta file that corresponds to the supplied GTF."), |
13 make_option(c("-o","--output"), type="character", default=FALSE, help="Output file with gene name, length and GC content.") | 19 make_option(c("-l","--length"), type="character", default=FALSE, help="Output file with gene name and length."), |
20 make_option(c("-gc","--gc_content"), type="character", default=FALSE, help="Output file with gene name and GC content.") | |
14 ) | 21 ) |
15 | 22 |
16 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) | 23 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) |
17 args = parse_args(parser) | 24 args = parse_args(parser) |
18 | 25 |
19 GTFfile = args$gtf | 26 GTFfile = args$gtf |
20 FASTAfile = args$fasta | 27 FASTAfile = args$fasta |
21 output_file = args$output | 28 length = args$length |
29 gc_content = args$gc_content | |
22 | 30 |
23 #Load the annotation and reduce it | 31 #Load the annotation and reduce it |
24 GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon") | 32 GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon") |
25 grl <- reduce(split(GTF, elementMetadata(GTF)$gene_id)) | 33 grl <- reduce(split(GTF, elementMetadata(GTF)$gene_id)) |
26 reducedGTF <- unlist(grl, use.names=T) | 34 reducedGTF <- unlist(grl, use.names=T) |
39 nGCs = sum(elementMetadata(x)$nGCs) | 47 nGCs = sum(elementMetadata(x)$nGCs) |
40 width = sum(elementMetadata(x)$widths) | 48 width = sum(elementMetadata(x)$widths) |
41 c(width, nGCs/width) | 49 c(width, nGCs/width) |
42 } | 50 } |
43 output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length)) | 51 output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length)) |
44 output <- setDT(data.frame(output), keep.rownames = TRUE)[] | 52 output <- data.frame(setDT(data.frame(output), keep.rownames = TRUE)[]) |
45 colnames(output) <- c("#gene_id", "length", "GC") | |
46 | 53 |
47 write.table(output, file=output_file, row.names=FALSE, quote=FALSE, sep="\t") | 54 |
55 write.table(output[,c(1,2)], file=length, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t") | |
56 write.table(output[,c(1,3)], file=gc_content, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t") | |
57 | |
48 | 58 |
49 sessionInfo() | 59 sessionInfo() |