Mercurial > repos > petrn > repeat_annotation_pipeline
annotate summarize_gff_by_attribute.R @ 0:d14182506989 draft default tip
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
author | petrn |
---|---|
date | Tue, 15 Feb 2022 16:44:31 +0000 |
parents | |
children |
rev | line source |
---|---|
0
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
1 #!/usr/bin/env Rscript |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
2 suppressPackageStartupMessages(library(rtracklayer)) |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
3 g = import(commandArgs(T)[1], format = "GFF") |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
4 attribute_name = commandArgs(T)[2] |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
5 |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
6 m = mcols(g) |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
7 AN = commandArgs(T)[2] |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
8 ## check if attribute is in gff: |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
9 if (!AN %in% colnames(m)){ |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
10 stop(paste0("attribute ", AN," not in input gff\n", "Available attributes are:\n", paste(colnames(m), collapse = "\n "))) |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
11 } |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
12 |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
13 w = width(g) |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
14 total_lengths = by(w, INDICES=m[,attribute_name] , sum) |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
15 total_counts = by(w, INDICES=m[,attribute_name] , length) |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
16 total_length_summary = do.call(rbind,as.list(by(w, INDICES=m[,attribute_name] , summary))) |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
17 colnames(total_length_summary) = paste0("length.", colnames(total_length_summary)) |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
18 |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
19 d = data.frame(attribute = names(total_counts), cbind(count = total_counts, total_length=total_lengths, length=total_length_summary)) |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
20 colnames(d)[1] = attribute_name |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
21 d = d[order(d$total_length, decreasing = TRUE),] |
d14182506989
"planemo upload commit d7966a292ed4209f4058e77ab8c0e49a67847b16-dirty"
petrn
parents:
diff
changeset
|
22 write.table(d, sep = "\t", row.names = FALSE, quote = FALSE, file = commandArgs(T)[3]) |