Mercurial > repos > mvdbeek > r_goseq_1_22_0
annotate get_length_and_gc_content.r @ 6:0e9424413ab0 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
author | mvdbeek |
---|---|
date | Thu, 03 Mar 2016 09:56:51 -0500 |
parents | a6427f7893b0 |
children |
rev | line source |
---|---|
0
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
1 # originally by Devon Ryan, https://www.biostars.org/p/84467/ |
6
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
2 |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
3 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
4 |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
5 # we need that to not crash galaxy with an UTF8 error on German LC settings. |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
6 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
0
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
7 |
6
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
8 suppressPackageStartupMessages({ |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
9 library("GenomicRanges") |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
10 library("rtracklayer") |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
11 library("Rsamtools") |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
12 library("optparse") |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
13 library("data.table") |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
14 }) |
0
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
15 |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
16 option_list <- list( |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
17 make_option(c("-g","--gtf"), type="character", help="Input GTF file with gene / exon information."), |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
18 make_option(c("-f","--fasta"), type="character", default=FALSE, help="Fasta file that corresponds to the supplied GTF."), |
6
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
19 make_option(c("-l","--length"), type="character", default=FALSE, help="Output file with gene name and length."), |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
20 make_option(c("-gc","--gc_content"), type="character", default=FALSE, help="Output file with gene name and GC content.") |
0
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
21 ) |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
22 |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
23 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
24 args = parse_args(parser) |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
25 |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
26 GTFfile = args$gtf |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
27 FASTAfile = args$fasta |
6
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
28 length = args$length |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
29 gc_content = args$gc_content |
0
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
30 |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
31 #Load the annotation and reduce it |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
32 GTF <- import.gff(GTFfile, format="gtf", genome=NA, feature.type="exon") |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
33 grl <- reduce(split(GTF, elementMetadata(GTF)$gene_id)) |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
34 reducedGTF <- unlist(grl, use.names=T) |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
35 elementMetadata(reducedGTF)$gene_id <- rep(names(grl), elementLengths(grl)) |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
36 |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
37 #Open the fasta file |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
38 FASTA <- FaFile(FASTAfile) |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
39 open(FASTA) |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
40 |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
41 #Add the GC numbers |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
42 elementMetadata(reducedGTF)$nGCs <- letterFrequency(getSeq(FASTA, reducedGTF), "GC")[,1] |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
43 elementMetadata(reducedGTF)$widths <- width(reducedGTF) |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
44 |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
45 #Create a list of the ensembl_id/GC/length |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
46 calc_GC_length <- function(x) { |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
47 nGCs = sum(elementMetadata(x)$nGCs) |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
48 width = sum(elementMetadata(x)$widths) |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
49 c(width, nGCs/width) |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
50 } |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
51 output <- t(sapply(split(reducedGTF, elementMetadata(reducedGTF)$gene_id), calc_GC_length)) |
6
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
52 output <- data.frame(setDT(data.frame(output), keep.rownames = TRUE)[]) |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
53 |
0
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
54 |
6
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
55 write.table(output[,c(1,2)], file=length, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t") |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
56 write.table(output[,c(1,3)], file=gc_content, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t") |
0e9424413ab0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit f95b47ed1a09ce14d3b565e8ea56d8bf12c35814-dirty
mvdbeek
parents:
0
diff
changeset
|
57 |
0
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
58 |
a6427f7893b0
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
mvdbeek
parents:
diff
changeset
|
59 sessionInfo() |