annotate lib/create_annotation.R @ 6:2925751ed586 draft

Uploaded
author petrn
date Fri, 20 Dec 2019 12:59:39 +0000
parents f6ebec6e235e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1 #!/usr/bin/env Rscript
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
2 Sys.setlocale("LC_CTYPE", "en_US.UTF-8") # this is necessary for handling unicode characters (data.tree package)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
3 suppressPackageStartupMessages(library(data.tree))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
4 suppressPackageStartupMessages(library(stringr))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
5 suppressPackageStartupMessages(library(R2HTML))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
6 suppressPackageStartupMessages(library(hwriter))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
7 suppressPackageStartupMessages(library(DT))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
8 suppressPackageStartupMessages(library(tools))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
9 suppressPackageStartupMessages(library(scales))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
10 suppressPackageStartupMessages(library(igraph))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
11 suppressPackageStartupMessages(library(plotrix))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
12 suppressPackageStartupMessages(library(png))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
13
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
14 source("htmlheader.R")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
15 source("config.R") # load tandem ranks info
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
16 source("utils.R")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
17 DT_OPTIONS = options = list(pageLength = 1000, lengthMenu = c(10,50,100,1000,5000,10000))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
18 WD = getwd() # to get script directory when run from Rserve
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
19 HTMLHEADER = htmlheader ## header (character) loaded from htmlheader.R
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
20 htmlheader = gsub("Superclusters summary","TAREAN summary", htmlheader)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
21
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
22 evaluate_LTR_detection = function(f){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
23 NO_LTR=NULL
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
24 if (length(readLines(f)) == 11 ){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
25 return(NO_LTR)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
26 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
27 df=read.table(f,as.is=TRUE,sep="\t", skip=11,fill=TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
28 if (ncol(df) != 23){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
29 #df is smaller if no pbs is detected!
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
30 return(NO_LTR)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
31 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
32 df=df[!df$V13 == "",,drop=FALSE]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
33 if (nrow(df)==0){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
34 return(NO_LTR)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
35 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
36 # criteria:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
37 df_part=df[df$V15 >=12 & df$V20 == 23 & df$V21<df$V20,,drop=FALSE]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
38 if (nrow(df_part) == 0){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
39 return(NO_LTR)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
40 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
41 PBS_type = gsub("_","",(str_extract_all(df_part$V13, pattern="_([A-Z][a-z]{2})", simplify=TRUE))) %>%
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
42 paste(collapse=" ")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
43 return(PBS_type)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
44 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
45
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
46
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
47
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
48 ## annotate superclusters
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
49 select_reads_id = function(index, search_type = c("cluster","supercluster")) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
50 ## select read if base on the supecluster index need database connection
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
51 ## HITSORTDB!
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
52 search_type = match.arg(search_type)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
53 x = dbGetQuery(HITSORTDB,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
54 paste0("SELECT vertexname FROM vertices WHERE vertexindex IN ",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
55 "(SELECT vertexindex FROM communities ",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
56 "WHERE ", search_type,"=\"", index,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
57 "\")"))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
58 return(x$vertexname)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
59 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
60
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
61
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
62 get_reads_annotation = function(reads_id) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
63 ## select annotation from tables in SEQDB which has name in format *_database
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
64 annot_types = grep("_database", dbListTables(SEQDB), value = TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
65 annot = list()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
66 for (i in annot_types) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
67 query = paste0("SELECT * FROM ", i, " WHERE name IN (", paste0("\"", reads_id,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
68 "\"", collapse = ", "), ")")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
69 annot[[i]] = dbGetQuery(SEQDB, query)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
70 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
71 return(annot)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
72 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
73
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
74 supercluster_size = function(supercluster) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
75 x = dbGetQuery(HITSORTDB, paste0("SELECT count(*) FROM vertices WHERE vertexindex IN ",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
76 "(SELECT vertexindex FROM communities ", "WHERE supercluster=\"", supercluster,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
77 "\")"))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
78 return(x$"count(*)")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
79 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
80
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
81
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
82 cluster_annotation = function(cluster, search_type = c("cluster", "supercluster")){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
83 ## searcheither for cluster or supercluster annotation
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
84 ## read annotation from sqlite databases database is access though SEQDB (sequence
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
85 ## annotation) and HITSORTDB - clustering information
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
86 search_type = match.arg(search_type)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
87 reads_id = select_reads_id(cluster, search_type)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
88 annot = get_reads_annotation(reads_id)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
89 return(annot)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
90 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
91
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
92 get_tarean_info = function(cluster, search_type = c("cluster", "supercluster")){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
93 search_type = match.arg(search_type)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
94 if (search_type == "cluster") {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
95 search_type = "[index]"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
96 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
97 tarean_info = dbGetQuery(HITSORTDB,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
98 paste0(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
99 "SELECT [index], supercluster, satellite_probability, tandem_rank, size_real, satellite FROM cluster_info WHERE ",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
100 search_type, " = ", cluster))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
101 nhits = sum(tarean_info$size_real[tarean_info$tandem_rank %in% 1:2])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
102 proportion = nhits/sum(tarean_info$size_real)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
103 return( list(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
104 nhits = nhits,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
105 proportion = proportion,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
106 tarean_info = tarean_info)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
107 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
108
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
109 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
110
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
111 get_ltr_info = function(supercluster){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
112 ltr_info = dbGetQuery(HITSORTDB,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
113 paste0(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
114 "SELECT [index], supercluster, ltr_detection, size_real FROM cluster_info WHERE ",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
115 "supercluster", " = ", supercluster))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
116 return(ltr_info)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
117 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
118
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
119 summarize_annotation = function(annot, size = NULL){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
120 db_id = do.call(rbind, annot)$db_id
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
121 cl_string = gsub("^.+/","",gsub(":.+$", "", gsub("^.+#", "", db_id)))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
122 weight = do.call(rbind, annot)$bitscore
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
123 domain = str_replace(str_extract(str_replace(db_id, "^.+#", ""), ":.+"), ":",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
124 "")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
125 domain[is.na(domain)] = ""
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
126 domain_table = table(cl_string, domain)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
127 dt = as.data.frame(domain_table)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
128 ## remove no count
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
129 dt = dt[dt$Freq != 0, ,drop = FALSE]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
130 rownames(dt) = paste(dt$cl_string,dt$domain)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
131 if (!is.null(size)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
132 dt$proportion = dt$Freq / size
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
133
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
134 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
135 return(dt)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
136 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
137
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
138 annot2colors = function(annot, ids, base_color = "#00000050"){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
139 annot_table = do.call(rbind, annot)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
140 domains = str_replace(str_extract(str_replace(annot_table$db_id, "^.+#", ""), ":.+"), ":","")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
141 other_annot = str_replace(annot_table$db_id, "^.+#", "")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
142 complete_annot = ifelse(is.na(domains), other_annot, domains)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
143 names(complete_annot) = annot_table$name
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
144 unique_complete_annot = names (table(complete_annot) %>% sort(decreasing = TRUE))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
145 color_key = unique_colors[seq_along(unique_complete_annot)]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
146 names(color_key) = unique_complete_annot
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
147 color_table = rep(base_color, length(ids))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
148 names(color_table) = ids
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
149 color_table[names(complete_annot)] = color_key[complete_annot]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
150 return(list( color_table = color_table, legend = color_key))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
151 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
152
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
153
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
154 read_annotation_to_tree = function(supercluster, TREE_TEMPLATE = CLS_TREE) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
155 ## CLS_TREE is datatree containing 'taxonomy' information of repeats, attribute of
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
156 ## each node/ leave if filled up from annotation annotation is added to leaves
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
157 ## only!! Inner node are NA or NULL
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
158 annot0 = cluster_annotation(supercluster, search_type = "supercluster")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
159 ## keep only built in annotation
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
160 annot = list(protein_databse = annot0$protein_database, dna_database = annot0$dna_database)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
161 annot$dna_database$bitscore = annot$dna_database$bitscore / 2 #DNA bitscore corection, blastx - more weight
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
162 annot = do.call(rbind, annot)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
163 ## remove duplicated hits, keep best
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
164 annot_clean = annot[order(annot$bitscore, decreasing = TRUE), ]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
165 annot_clean = annot_clean[!duplicated(annot_clean$name), ]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
166 tarean_info = get_tarean_info(supercluster, search_type = "supercluster")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
167 ltr_info = get_ltr_info(supercluster)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
168 db_id = annot_clean$db_id
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
169 cl_string = gsub(":.+$", "", gsub("^.+#", "", db_id))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
170 weight = annot_clean$bitscore
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
171 domain = str_replace(str_extract(str_replace(db_id, "^.+#", ""), ":.+"), ":",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
172 "")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
173 domain[is.na(domain)] = "NA"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
174 mean_weight_table = by(weight, INDICES = list(cl_string, domain), FUN = function(x) signif(mean(x)))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
175 mean_weight_table[is.na(mean_weight_table)] = 0
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
176 total_weight_table = by(weight, INDICES = list(cl_string, domain), FUN = sum)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
177 total_weight_table[is.na(total_weight_table)] = 0
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
178 cl_table = table(cl_string)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
179 domain_table = table(cl_string, domain)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
180 cls_tree = Clone(TREE_TEMPLATE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
181 cls_tree$size = supercluster_size(supercluster)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
182 cls_tree$ltr_info = ltr_info
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
183 for (i in cls_tree$leaves) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
184 if (i$full_name %in% names(cl_table)) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
185 i$nhits = cl_table[[i$full_name]]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
186 i$domains = domain_table[i$full_name, ]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
187 names(i$domains) = colnames(domain_table)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
188 i$mean_weight = mean_weight_table[i$full_name, ]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
189 i$total_weight = total_weight_table[i$full_name, ]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
190 i$proportion = signif(i$nhits/cls_tree$size, 3)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
191 } else {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
192 if (i$name == "satellite"){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
193 i$nhits = tarean_info$nhits
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
194 i$tandem_rank = tarean_info$tarean_info$tandem_rank
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
195 i$proportion = tarean_info$proportion
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
196 i$tarean_info = tarean_info$tarean_info
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
197 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
198 i$proportion = 0
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
199 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
200 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
201 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
202 ## create domain string for printing:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
203 for (i in Traverse(cls_tree)) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
204 if (is.null(i$domains)) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
205 i$features = ""
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
206 } else if (sum(i$domains) == 0) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
207 i$features = ""
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
208 } else {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
209 dom = i$domains[!names(i$domains) == "NA"]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
210 i$features = gsub(" *\\(\\)", "", pasteDomains(dom))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
211 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
212 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
213 ## add LTR info:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
214 if (any(!cls_tree$ltr_info$ltr_detection %in% "None")){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
215 tr = FindNode(cls_tree, "LTR")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
216 tr$features = "LTR/PBS"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
217 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
218 return(cls_tree)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
219 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
220
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
221 pasteDomains = function(dom){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
222 d = dom[dom != 0]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
223 if (length(d) == 0){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
224 return("")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
225 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
226 paste0(d, " (", names(d), ")", sep = ", ", collapse="")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
227 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
228
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
229 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
230
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
231 rescale = function(x, newrange) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
232 # taken from plotrix package
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
233 if (missing(x) | missing(newrange)) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
234 usage.string <- paste("Usage: rescale(x,newrange)\n", "\twhere x is a numeric object and newrange is the new min and max\n",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
235 sep = "", collapse = "")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
236 stop(usage.string)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
237 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
238 if (is.numeric(x) && is.numeric(newrange)) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
239 xna <- is.na(x)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
240 if (all(xna))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
241 return(x)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
242 if (any(xna))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
243 xrange <- range(x[!xna]) else xrange <- range(x)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
244 if (xrange[1] == xrange[2])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
245 return(x)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
246 mfac <- (newrange[2] - newrange[1])/(xrange[2] - xrange[1])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
247 return(newrange[1] + (x - xrange[1]) * mfac)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
248 } else {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
249 warning("Only numeric objects can be rescaled")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
250 return(x)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
251 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
252 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
253
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
254 add_value_to_nodes = function(tr, value_names = c("nhits", "domains", "total_weight",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
255 "proportion")) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
256 ## propagate values from leaves to nodes, assume that nodes are not defined
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
257 ## (either NA or NULL) leaves coud be be either numeric of list, if list values
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
258 ## are summed up based on the names
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
259 for (vn in value_names) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
260 for (i in Traverse(tr)) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
261 w = add_leaves_value(i$leaves, vn)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
262 i[[vn]] = w
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
263 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
264 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
265 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
266
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
267 add_leaves_value = function(x, name) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
268 ## check if annotation is multivalue or single value, propagates values from
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
269 ## leaves to root
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
270 n = unique(unlist(lapply(lapply(x, "[[", name), names)))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
271 if (is.null(n)) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
272 ## no names given, we can sum everything to one value
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
273 total = sum(unlist(sapply(x, "[[", name)), na.rm = TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
274 return(total)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
275 } else {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
276 total = numeric(length(n))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
277 names(total) = n
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
278 xvals = lapply(x, "[[", name)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
279 N = 0
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
280 for (i in xvals) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
281 for (j in names(i)) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
282 total[j] = total[j] + i[j]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
283 N = N + 1
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
284 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
285 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
286 return(total)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
287 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
288 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
289
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
290
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
291 trmap = function(tr, xl = 0, yb = 0, xr = 1, yt = 1, first = TRUE, vertical = TRUE) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
292 # treemap for data.tree plotting - experimental
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
293 if (first) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
294 plot(xl:xr, yb:yt, type = "n", axes = FALSE, xlab = "", ylab = "")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
295 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
296 if (tr$nhits > 0) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
297 width = 2 * (6 - tr$level)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
298 rect(xl, yb, xr, yt, col = "#00008805", lwd = width, border = "#00000080")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
299 if (tr$level != 1) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
300 text(x = (xl + xr)/2, y = (yb + yt)/2, labels = tr$name, cex = width/4)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
301 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
302 } else {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
303 return(NULL)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
304 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
305 Nchildren = length(tr$children)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
306 cat("children:", tr$name, tr$level, "\n")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
307 if (Nchildren == 0) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
308 return(NULL)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
309 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
310 sizes = sapply(tr$children, "[[", "nhits")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
311 rng = c(sizes, 0) / sum(sizes)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
312 if (vertical) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
313 ## x does not change
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
314 xl2 = rep(xl, Nchildren)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
315 xr2 = rep(xr, Nchildren)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
316 yb2 = rescale(rng, c(yb, yt))[1:(Nchildren + 1)]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
317 yt2 = rescale(rng, c(yb, yt))[-1]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
318 } else {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
319 yb2 = rep(yb, Nchildren)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
320 yt2 = rep(yt, Nchildren)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
321 xr2 = rescale(rng, c(xl, xr))[1:(Nchildren + 1)]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
322 xl2 = rescale(rng, c(xl, xr))[-1]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
323 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
324 for (i in 1:Nchildren) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
325 trmap(tr$children[[i]], xl2[i], yb2[i], xr2[i], yt2[i], first = FALSE, vertical = !vertical)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
326 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
327
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
328 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
329
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
330 filter_tree = function(tr) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
331 ## keep only nodes with positive nhits values
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
332 ## must be used on trees where leaves were already propagated
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
333 ## using add_values_to_nodes
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
334 tr_filtered = Clone(tr)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
335 Prune(tr_filtered, function(x) x$nhits > 0 | containLTR(x))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
336 return(tr_filtered)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
337 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
338
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
339 containLTR = function(tr){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
340 for (i in Traverse(tr)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
341 if (i$features == "LTR/PBS"){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
342 return(TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
343 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
344 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
345 return(FALSE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
346 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
347
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
348 filter_tree2 = function(tr) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
349 tr_filtered = Clone(tr)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
350 Prune(tr_filtered, function(x) x$parent$nhits > 0)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
351 return(tr_filtered)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
352 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
353
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
354 ## this is for testing purposesm in futurem each node will have its function
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
355 ## named find_best_hit, it will work like decisin tree.
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
356 ## functions will be set manually based on training data
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
357 formatWidth = function(x, w){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
358 if (length (dim(x)) > 1){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
359 for (i in seq_along(x)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
360 delta = nchar(x[,i], type="bytes") - nchar(x[,i], type="char")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
361 ## delta correction is neccessary for utf-8 correct formating
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
362 x[,i] = sprintf(paste0("%-",w[i] + delta,"s"), x[,i])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
363 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
364 return(x)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
365 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
366 delta = nchar(x, type="bytes") - nchar(x, type="char")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
367 sprintf(paste0("%",w + delta,"s"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
368 x) %>% return
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
369 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
370 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
371
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
372
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
373 find_best_hit_repeat = function(cls_tree){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
374 ## three children:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
375 ## repeat
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
376 ## |--rDNA
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
377 ## |--satellite this need special handling, rank 1 or 2
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
378 ## |--mobile_element
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
379 ##
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
380 ## in this case we require that satellite has proportion 0.95
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
381 ## params of good hit
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
382 min_prop_of_positive = 0.7
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
383 min_prop_second_best = 0.9
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
384 min_proportion_x_nhits = 2.5 # based on 0.05 x 50
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
385 min_satellite_proportion = 0.95
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
386 if (isLeaf(cls_tree)) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
387 return(cls_tree)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
388 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
389 cond1 = cls_tree$nhits * cls_tree$proportion < min_proportion_x_nhits
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
390 if (cond1){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
391 return(cls_tree)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
392 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
393 nhits = sapply(cls_tree$children, "[[", "nhits")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
394 all_hits = cls_tree$root$nhits
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
395 name_of_best_hit = cls_tree$children[[which.max(nhits)]]$name
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
396 best_hit_child = cls_tree$children[[which.max(nhits)]]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
397 second_max_hits = ifelse(length(nhits) == 1, 0, max(nhits[-which.max(nhits)]))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
398 cond2 = max(nhits) / sum(nhits) > min_prop_of_positive
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
399 cond3 = max(nhits) / (second_max_hits + max(nhits)) > min_prop_second_best
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
400 cond_satellite = best_hit_child$proportion > min_satellite_proportion & name_of_best_hit == "satellite"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
401 cond_other = ! name_of_best_hit == "satellite"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
402 cat(cond2, cond3, cond_satellite, cond_other, "\n")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
403 if (cond2 & cond3 & (cond_satellite | cond_other)) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
404 ## clear case satellite or other
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
405 if ("find_best_hit" %in% names(best_hit_child)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
406 ## custom rules for node is defined
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
407 best_hit = best_hit_child$find_best_hit(cls_tree$children[[which.max(nhits)]])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
408 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
409 # use generic rules
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
410 best_hit = find_best_hit(cls_tree$children[[which.max(nhits)]])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
411 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
412 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
413 ## do more specific tests for rDNA, or mobile element
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
414 ## rDNA o mobile_elements must be above threshold!
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
415 cond_sat_rank = any(cls_tree$satellite$tandem_rank == 2) & cls_tree$satellite$nhits != 0
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
416 cond_rdna = cls_tree$rDNA$nhits * cls_tree$rDNA$proportion > min_proportion_x_nhits
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
417 cond_mobile = cls_tree$mobile_element$nhits * cls_tree$mobile_element$proportion > min_proportion_x_nhits
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
418 cat(cond_sat_rank, "\n")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
419 cat(cond_rdna,"\n")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
420 cat(cond_mobile, "\n")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
421 if (cond_sat_rank & (cond_rdna | cond_mobile) ){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
422 cls_tree$satellite$nhits = 0
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
423 best_hit = find_best_hit(cls_tree)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
424 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
425 return(cls_tree)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
426 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
427 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
428 return(best_hit)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
429 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
430
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
431 find_best_hit = function(cls_tree){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
432 ## general params of good hit
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
433 min_prop_of_positive = 0.7
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
434 min_prop_second_best = 0.8
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
435 min_proportion_x_nhits = 2.5 # based on 0.05 x 50
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
436 if (isLeaf(cls_tree)) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
437 return(cls_tree)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
438 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
439
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
440
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
441 cond1 = cls_tree$nhits * cls_tree$proportion < min_proportion_x_nhits
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
442
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
443 if (cond1){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
444 ## return if proportions is lower than threshold
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
445 return(cls_tree)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
446 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
447
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
448 nhits = sapply(cls_tree$children, "[[", "nhits")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
449 best_hit_child = cls_tree$children[[which.max(nhits)]]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
450 ## more special cases:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
451 second_max_hits = ifelse(length(nhits) == 1, 0, max(nhits[-which.max(nhits)]))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
452 cond2 = max(nhits) / sum(nhits) > min_prop_of_positive
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
453 cond3 = max(nhits) / (second_max_hits + max(nhits)) > min_prop_second_best
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
454 if (cond2 & cond3) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
455 if ("find_best_hit" %in% names(best_hit_child)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
456 ## custom rules for node is defined
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
457 best_hit = best_hit_child$find_best_hit(cls_tree$children[[which.max(nhits)]])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
458 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
459 # use generic rules
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
460 best_hit = find_best_hit(cls_tree$children[[which.max(nhits)]])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
461 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
462 } else {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
463 return(cls_tree)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
464 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
465 return(best_hit)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
466 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
467
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
468
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
469 get_annotation_groups = function(tr){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
470 tr_all = Clone(tr)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
471 Prune(tr_all, pruneFun=function(...)FALSE) ## prune everithing -> keep root
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
472 tr_all$name = "Unclassified repeat (No evidence)"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
473 tr_contamination = Clone(tr)$contamination
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
474 tr_organelle = Clone(tr)$organelle
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
475 tr_repeat = Clone(tr)$"repeat"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
476 tr_repeat$name = "Unclassified_repeat (conflicting evidences)"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
477 return(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
478 list(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
479 tr_repeat = tr_repeat,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
480 tr_organelle = tr_organelle,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
481 tr_all = tr_all,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
482 tr_contamination = tr_contamination
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
483 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
484 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
485 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
486
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
487
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
488
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
489 format_tree = function(cls_tree, ...) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
490 df = ToDataFrameTree(cls_tree, ...)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
491 ## try to get rid off unicode characters
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
492 df[,1] = gsub("\U00B0", "'", df[,1], fixed = TRUE, useBytes = TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
493 df[,1] = gsub("\U00A6", "|", df[,1], fixed = TRUE, useBytes = TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
494 colnames(df)[1] = " " ## originally levelName
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
495 if ("proportion" %in% colnames(df)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
496 df$proportion = signif(df$proportion,2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
497 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
498 if ("Proportion[%]" %in% colnames(df)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
499 df$"Proportion[%]" = round(df$"Proportion[%]", 2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
500 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
501 ## format header
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
502 w1 = apply(df, 2, function(x) max(nchar(x)))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
503 w2 = nchar(colnames(df))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
504 # use whatever with is bigger for formating
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
505 w = ifelse(w1 > w2, w1, w2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
506
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
507 out = character()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
508 header = mapply(FUN = function(x, y) formatWidth(x, w = y), colnames(df), w) %>%
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
509 paste0(collapse=" | ")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
510 hr_line = gsub(".","-",header)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
511 # create output
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
512 # classification lines
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
513 class_lines = formatWidth(df, w) %>%
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
514 apply(1, FUN = function(x) paste0(x,collapse = " | ")) %>%
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
515 paste(collapse = "\n")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
516 paste(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
517 header,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
518 hr_line,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
519 class_lines,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
520 sep="\n") %>% return
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
521 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
522
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
523
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
524 make_final_annotation_template = function(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
525 annot_attributes = list(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
526 Nreads = 0,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
527 Nclusters = 0,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
528 Nsuperclusters = 0,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
529 "Proportion[%]" = 0,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
530 cluster_list = c())
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
531 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
532 {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
533 ct = Clone(CLS_TREE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
534 for (i in Traverse(ct)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
535 for (j in names(annot_attributes)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
536 i[[j]] = annot_attributes[[j]]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
537 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
538 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
539 return(ct)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
540 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
541
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
542
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
543 common_ancestor = function(tr1, tr2){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
544 a1 = tr1$Get('name', traversal = "ancestor")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
545 a2 = tr2$Get('name', traversal = "ancestor")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
546 ancestor = intersect(a1,a2)[1]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
547 return(ancestor)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
548 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
549
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
550 create_all_superclusters_report = function(max_supercluster = 100,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
551 paths,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
552 libdir,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
553 superclusters_dir,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
554 seqdb, hitsortdb,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
555 classification_hierarchy_file,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
556 HTML_LINKS)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
557 {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
558 ## connect to sqlite databases
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
559 ## seqdb and hitsortdb are path to sqlite files
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
560 HTML_LINKS = nested2named_list(HTML_LINKS)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
561 paths = nested2named_list(paths)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
562 report_file = paths[['supercluster_report_html']]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
563 ## create SEQDB, HTISORTDB and CLS_TREE
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
564 connect_to_databases(seqdb, hitsortdb, classification_hierarchy_file)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
565
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
566 ## append specific classication rules
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
567 CLS_TREE$"repeat"$find_best_hit = find_best_hit_repeat
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
568
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
569 ###
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
570 cat("Superclusters summary\n", file = report_file)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
571 cat("---------------------\n\n", file = report_file, append = TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
572 empty = rep(NA,max_supercluster)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
573 SC_table = data.frame(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
574 Supercluster = 1 : max_supercluster, "Number of reads" = empty, Automatic_annotation = empty,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
575 Similarity_hits = empty, TAREAN_annotation = empty,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
576 Clusters = empty, stringsAsFactors = FALSE, check.names = FALSE
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
577 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
578 SC_csv = SC_table # table as spreadsheet - no html formatings
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
579 final_cluster_annotation = make_final_annotation_template()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
580
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
581 for (sc in 1 : max_supercluster) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
582 cat("supercluster: ",sc,"\n")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
583 cls_tree = read_annotation_to_tree(sc)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
584 sc_summary = get_supercluster_summary(sc)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
585 add_value_to_nodes(cls_tree)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
586 cls_tree_filtered = filter_tree(cls_tree)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
587 best_hit = find_best_hit(cls_tree)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
588 ## exception to decision tree put here.
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
589 ## All -> class I ?
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
590 if (best_hit$name == "All"){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
591 ## check LTR
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
592 if (any(!(unique(cls_tree$ltr_info$ltr_detection) %in% "None"))){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
593 ## if LTR found - move classification to Class I
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
594
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
595 LTR = FindNode(best_hit, "LTR")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
596 nhits_without_class_I = best_hit$nhits - LTR$nhits
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
597 prop_without_class_I = best_hit$proportion - LTR$proportion
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
598 cond3 = nhits_without_class_I * prop_without_class_I < 1
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
599 cond2 = best_hit$nhits * best_hit$proportion < 0.5 # other hits weak
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
600 cond1 = LTR$nhits >= (0.95 * best_hit$nhits) # hits are pre in sub class_I
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
601
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
602 if (cond1 | cond2 | cond3){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
603 best_hit = LTR
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
604 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
605 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
606 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
607 ## check is conflict os best hit with LTR/PBS exists
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
608 if (any(!(unique(cls_tree$ltr_info$ltr_detection) %in% "None"))){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
609 LTR = FindNode(cls_tree, "LTR")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
610 ca_name = common_ancestor(LTR, best_hit)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
611 if (ca_name != "LTR"){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
612 ## reclassify
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
613 best_hit = FindNode(cls_tree, ca_name)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
614 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
615 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
616 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
617
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
618 best_hit_name = best_hit$name
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
619 best_hit_path = paste(best_hit$path, collapse = "/")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
620 ## add best hit do database:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
621 sqlcmd = paste0(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
622 "UPDATE cluster_info SET supercluster_best_hit = \"", best_hit_path, "\" ",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
623 " WHERE supercluster = ", sc
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
624 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
625 dbExecute(HITSORTDB, sqlcmd)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
626 # add annotation annotation summary stored in final_cluster_annotation - summing up
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
627 best_hit_node = FindNode(final_cluster_annotation, best_hit_name)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
628 best_hit_node$Nsuperclusters = best_hit_node$Nsuperclusters + 1
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
629 best_hit_node$Nclusters = best_hit_node$Nclusters + sc_summary$Nclusters
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
630 best_hit_node$Nreads = best_hit_node$Nreads + sc_summary$Nreads
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
631 best_hit_node$"Proportion[%]" = best_hit_node$"Proportion[%]" + sc_summary$proportion*100
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
632 best_hit_node$cluster_list = append(best_hit_node$cluster_list, sc_summary$cluster_list)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
633 best_hit_label = ifelse(best_hit_name == "All", "NA", best_hit_name)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
634 SC_csv$Automatic_annotation[sc] = best_hit_label
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
635 SC_csv$"Number of reads"[sc] = SC_table$"Number of reads"[sc] = cls_tree$size
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
636 SC_csv$Similarity_hits[sc] = format_tree(cls_tree_filtered,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
637 "nhits", "proportion", "features")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
638 SC_table$Similarity_hits[sc] = format_tree(cls_tree_filtered,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
639 "nhits", "proportion", "features") %>%
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
640 preformatted
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
641
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
642 SC_table$Automatic_annotation[sc] = best_hit_label
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
643
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
644 G = get_supercluster_graph(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
645 sc=sc, seqdb = seqdb,hitsortdb = hitsortdb,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
646 classification_hierarchy_file = classification_hierarchy_file
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
647 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
648 ## append tarean annotation
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
649 clist = paste(V(G)$name, collapse =",")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
650 cl_rank = dbGetQuery(HITSORTDB,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
651 paste("SELECT [index], tandem_rank FROM cluster_info WHERE [index] IN (",clist,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
652 ") AND tandem_rank IN (1,2)"))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
653 ## add information about TR clusters is any
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
654 if (nrow(cl_rank) > 0){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
655 tarean_annot = paste(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
656 cl_rank$index, "-",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
657 RANKS_TANDEM[cl_rank$tandem_rank]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
658 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
659 SC_csv$TAREAN_annotation[sc] = paste(tarean_annot,collapse="\n")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
660 SC_table$TAREAN_annotation[sc] = mapply(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
661 hwrite, tarean_annot,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
662 link = sprintf(HTML_LINKS$ROOT_TO_TAREAN,cl_rank$index)) %>% paste(collapse="<br>")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
663 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
664 ## add links to clusters
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
665 SC_csv$Clusters[sc] = paste((V(G)$name), collapse = ",")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
666 links = mapply(hwrite, V(G)$name, link = sprintf(HTML_LINKS$ROOT_TO_CLUSTER, as.integer(V(G)$name)))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
667 SC_table$Clusters[sc] = paste(links, collapse =", ")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
668 create_single_supercluster_report(G, superclusters_dir)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
669 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
670
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
671
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
672
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
673 ## add html links
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
674 SC_table$Supercluster = mapply(hwrite, SC_table$Supercluster,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
675 link = sprintf(HTML_LINKS$ROOT_TO_SUPERCLUSTER,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
676 SC_table$Supercluster))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
677
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
678
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
679
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
680 write.table(SC_csv, file = paths[["superclusters_csv_summary"]],
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
681 sep = "\t", row.names = FALSE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
682
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
683
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
684 DT_instance = datatable(SC_table, escape = FALSE, options = DT_OPTIONS)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
685 saveWidget(DT_instance, file = normalizePath(report_file),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
686 normalizePath(libdir), selfcontained = FALSE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
687 add_preamble(normalizePath(report_file),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
688 preamble='<h2>Supercluster annotation</h2> <p><a href="documentation.html#superclust"> For table legend see documentation. <a> </p>')
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
689
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
690 annot_groups = get_annotation_groups(final_cluster_annotation)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
691 saveRDS(object=annot_groups, file = paths[['repeat_annotation_summary_rds']])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
692 final_cluster_annotation_formated = paste(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
693 sapply(annot_groups, format_tree, 'Proportion[%]', 'Nsuperclusters', 'Nclusters', "Nreads"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
694 collapse = "\n\n\n"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
695 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
696 ## TODO add singleton counts, total counts and extra text - csv output
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
697 ## export annotation summary
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
698 html_summary = start_html(paths[['summarized_annotation_html']], gsub("PAGE_TITLE", "Repeat Annotation Summary", HTMLHEADER))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
699 html_summary("Repeat annotation summary", HTML.title, HR=2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
700 html_summary("This table summarizes the automatic annotations of superclusters that should be verified and manually corrected if necessary. Thus, the table should not be used as the final output of the analysis without critical evaluation.", HTML.title, HR=3)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
701 html_summary(preformatted(final_cluster_annotation_formated), cat)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
702
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
703
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
704
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
705 return()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
706 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
707 ## for testing
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
708 if (FALSE){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
709 create_supercluster_report(1:100, report_file, seqdb, hitsortdb, class_file)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
710 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
711
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
712 create_single_supercluster_report = function(G, superclusters_dir){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
713 sc_dir = paste0(superclusters_dir, "/dir_SC",sprintf("%04d", G$name))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
714 htmlfile = paste0(sc_dir, "/index.html")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
715 dir.create(sc_dir)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
716 title = paste("Supercluster no. ",G$name)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
717 htmlheader = gsub("PAGE_TITLE", title, HTMLHEADER)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
718 cat(htmlheader, file = htmlfile)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
719 file.copy(paste0(WD,"/style1.css"), sc_dir)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
720 HTML.title(title, file = htmlfile)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
721 HTML("Simmilarity hits (only hits with proportion above 0.1% in at least one cluster are shown) ", file = htmlfile)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
722 img_file = paste0(sc_dir,"/SC",sprintf("%0d",G$name), ".png")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
723 png(filename = img_file, width = 1400, height=1200)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
724 coords = plot_supercluster(G = G)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
725 dev.off()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
726 ## basename - make link relative
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
727 href = paste0("../../clusters/dir_CL",sprintf("%04d", as.numeric(V(G)$name)),"/index.html")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
728 html_insert_image(basename(img_file), htmlfile,coords=coords, href = href)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
729
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
730 if (is_comparative()){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
731 HTML("Comparative analysis", file = htmlfile)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
732 img_file = paste0(sc_dir,"/SC",sprintf("%0d",G$name), "comparative.png")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
733 png(filename = img_file, width = 1400, height=1200)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
734 coords = plot_supercluster(G = G, "comparative")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
735 mtext("comparative analysis")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
736 dev.off()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
737 ## basename - make link relative
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
738 href = paste0("../../clusters/dir_CL",sprintf("%04d", as.numeric(V(G)$name)),"/index.html")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
739 html_insert_image(basename(img_file), htmlfile,coords=coords, href = href)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
740
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
741 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
742 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
743
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
744 html_insert_image = function(img_file, htmlfile, coords=NULL, href=NULL) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
745 img_tag = sprintf('<p> <img src="%s" usemap="#clustermap" > \n</p>\n<br>\n', img_file)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
746 cat(img_tag, file = htmlfile, append = TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
747 if (!is.null(coords)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
748 formated_links = character()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
749 for (i in seq_along(href)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
750 formated_links[i] = sprintf('<area shape="circle" coords="%f,%f,%f" href="%s" >\n',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
751 coords[i,1],coords[i,2],coords[,3],href[i])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
752 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
753 cat('<map name="clustermap" >\n',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
754 formated_links,"</map>\n", file=htmlfile, append = TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
755
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
756 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
757 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
758
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
759 html_insert_floating_image = function(img_file,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
760 htmlfile,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
761 width=NULL,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
762 title= "",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
763 footer = ""
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
764 ){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
765 if (is.null(width)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
766 width=""
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
767 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
768 tag = paste(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
769 '<div class="floating_img">\n',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
770 ' <p>', title,'</p>\n',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
771 ' <a href=',img_file, ">",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
772 ' <img src="',img_file, '" alt="image" width="',width,'">\n',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
773 ' </a>',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
774 ' <p> ',footer,'</p>\n',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
775 '</div>\n'
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
776 ,sep = "")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
777 cat(tag, file = htmlfile, append = TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
778 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
779
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
780 get_supercluster_graph = function(sc, seqdb, hitsortdb, classification_hierarchy_file){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
781 ## sc - superclusted index
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
782 ## seqdb, hitsortdb - path to sqlite databases
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
783 ## classificationn_hierarchy_file - path data.tree rds file
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
784 ## SIZE of image
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
785 SIZE = 1000
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
786 connect_to_databases(seqdb, hitsortdb, classification_hierarchy_file)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
787 clusters = dbGetQuery(HITSORTDB,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
788 paste0("SELECT cluster, size FROM superclusters WHERE supercluster=",sc)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
789 ) %>% as.data.frame
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
790 ## string for sql query:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
791 cluster_list = paste0( "(", paste0(clusters$cluster, collapse = ","),")")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
792 supercluster_ncol = dbGetQuery(HITSORTDB,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
793 paste("SELECT c1,c2,w, k FROM cluster_mate_bond where c1 IN ",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
794 cluster_list, " AND c2 IN ", cluster_list, "AND k > 0.1"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
795 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
796 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
797 if (nrow(supercluster_ncol)>0){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
798 ## at least two clusters
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
799 G = graph.data.frame(supercluster_ncol, directed=FALSE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
800 L = layout_with_kk(G)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
801 ## layout is rescaled for easier linking html
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
802 L[,1] = scales::rescale(L[,1], to = c(1,SIZE) )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
803 L[,2] = scales::rescale(L[,2], to = c(1,SIZE) )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
804 G$L = L
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
805 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
806 ## only one cluster in supercluster
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
807 G = graph.full(n = 1)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
808 V(G)$name = as.character(clusters$cluster)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
809 G$L = matrix(c(SIZE/2, SIZE/2), ncol=2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
810 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
811 G = set_vertex_attr(G, "label", value = paste0("CL",names(V(G))))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
812 G$name=sc ## names is supercluster
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
813 # create annotation of individual clusters, will be attached as node attribudes
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
814 annot = get_cluster_annotation_summary(clusters)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
815 ## annotation of nodes(clusters)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
816 G = set_vertex_attr(G, "annotation", value = annot$clusters[names(V(G))])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
817 G = set_vertex_attr(G, "size",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
818 value = clusters$size[match(names(V(G)), as.character(clusters$cluster))])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
819 G$annotation = annot$supercluster
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
820 G$clusters = clusters
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
821 # TODO if comparative analysis - add proportion of species
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
822 if (is_comparative()){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
823 comparative_counts = get_cluster_comparative_counts(cluster_list)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
824 NACluster = comparative_counts$supercluster
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
825 # some clusters do not have comparative data - they are bellow threshold!
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
826 NACluster[,] = NA
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
827 counts_adjusted = comparative_counts$clusters[names(V(G))]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
828 for (i in names(V(G))){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
829 if(is.null(counts_adjusted[[i]])){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
830 ## get count from database
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
831 seqid = dbGetQuery(HITSORTDB,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
832 paste(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
833 "SELECT vertexname FROM vertex_cluster where cluster = ",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
834 i)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
835 )$vertexname
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
836 codes = get_comparative_codes()$prefix
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
837 x = table(factor(substring(seqid, 1,nchar(codes[1])), levels = codes))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
838 counts_adjusted[[i]] = data.frame(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
839 Freq = as.numeric(x),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
840 proportion = as.numeric(x/sum(x)),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
841 row.names = names(x))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
842
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
843 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
844 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
845
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
846 G = set_vertex_attr(G, "comparative_counts",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
847 value = counts_adjusted[V(G)$name]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
848 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
849 # for whole supercluster
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
850 G$comparative = comparative_counts$supercluster
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
851 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
852
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
853 return(G)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
854 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
855
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
856 get_cluster_comparative_counts = function(cluster_list){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
857 counts = dbGetQuery(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
858 HITSORTDB,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
859 paste0("SELECT * FROM comparative_counts WHERE clusterindex IN",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
860 cluster_list
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
861 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
862 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
863 comparative_counts = apply(counts, 1, function(x)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
864 y = data.frame(Freq = x[-1], proportion = x[-1]/sum(x[-1]))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
865 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
866 names(comparative_counts) = counts$clusterindex
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
867 total_counts = data.frame(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
868 Freq = colSums(counts[,-1]),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
869 proportion = colSums(counts[,-1])/sum(counts[,-1]))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
870 return(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
871 list(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
872 clusters = comparative_counts,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
873 supercluster = total_counts
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
874 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
875 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
876
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
877 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
878
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
879 get_cluster_annotation_summary = function(clusters){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
880 ## clusters - table of clusters, col: cluster, size
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
881 annot_list = apply(clusters,1,FUN = function(x)summarize_annotation(cluster_annotation(x[1]),x[2]))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
882 ## if empty - not annotation at all
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
883 if (sum(sapply(annot_list, nrow)) == 0){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
884 return(list (clusters = NULL,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
885 superclusters = NULL
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
886 ))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
887 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
888 names(annot_list) = as.character(clusters$cluster)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
889 annot_all = do.call(rbind,annot_list)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
890 total = by(annot_all$Freq,INDICES=with(annot_all, paste(cl_string,domain)), FUN=sum, simplify=TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
891 total_df = data.frame(Freq = c(total), proportion = c(total) /sum(clusters$size))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
892 ## for ploting it need to contain all annot categories as in s upercluster
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
893 annot_list_full = list()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
894 for (i in seq_along(annot_list)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
895 annot_list_full[[i]] = total_df
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
896 for (j in rownames(total_df)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
897 if (!j %in% rownames(annot_list[[i]])){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
898 annot_list_full[[i]][j,c('Freq','proportion')] = c(0,0)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
899 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
900 annot_list_full[[i]][j,c('Freq','proportion')] = annot_list[[i]][j,c('Freq','proportion')]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
901 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
902 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
903 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
904 names(annot_list_full) = names(annot_list)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
905 return(list (clusters = annot_list_full,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
906 supercluster = total_df
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
907 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
908 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
909 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
910
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
911 plot_supercluster = function(G,color_scheme=c("annotation","comparative")){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
912 ## create plot in coords y: 1-1200, x: 1 - 1400
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
913 ## this is fixed for href to work in exact image areas!
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
914 color_scheme = match.arg(color_scheme)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
915 LIMS0 = matrix(c (1,1000,1,1000), ncol = 2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
916 OFFSET_H = 100; OFFSET_W = 200
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
917 lims = LIMS0 + c(0,OFFSET_W * 2, 0, OFFSET_H * 2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
918 ## use full range
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
919 par(mar=c(0,0,0,0),xaxs="i", yaxs="i")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
920 ## proportion of repeats
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
921 if (color_scheme == "annotation"){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
922 prop = sapply (V(G)$annotation, FUN = function(x)x$proportion)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
923 brv = c("#BBBBBB",unique_colors)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
924 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
925 prop = sapply (V(G)$comparative_counts, FUN = function(x)x$proportion)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
926 brv = unique_colors
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
927 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
928 ## handle special cases - single cluster with annot or single annotation group
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
929 if (is.null(dim(prop))){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
930 prop=matrix(prop, nrow = 1)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
931 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
932 if (length(prop)==0){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
933 ## special case - not annotation at all
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
934 prop = matrix(rep(1,vcount(G)),ncol = vcount(G), dimnames=list("NAN", NULL))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
935 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
936 if (color_scheme == "annotation"){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
937 rownames(prop) = rownames(G$annotation)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
938 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
939 rownames(prop) = rownames(G$comparative)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
940 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
941 ## reorder by prop
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
942 if (color_scheme =="annotation"){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
943 prop = prop[order(prop[,1], decreasing = TRUE),,drop=FALSE]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
944 NAN = 1 - colSums(prop)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
945 prop = rbind(NAN, prop)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
946 brv = c("#BBBBBB",unique_colors)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
947 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
948 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
949 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
950 L = G$L + c(OFFSET_H)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
951 ## for href - necessary convert coordinater - image is flipped vertically
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
952 include = rowSums(prop>0.001, na.rm = TRUE)>=1
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
953 include[1] = TRUE
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
954 prop = prop[include, , drop=FALSE]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
955 plot(0,0,type='n', , xlim = lims[,1], ylim = lims[,2])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
956 plot_edges(G, L, lwd = 8)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
957 RADIUS = radius_size(G)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
958 coords = cbind(G$L[,1,drop=FALSE] + 100, 1100 - G$L[,2,drop=FALSE], RADIUS)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
959 pieScatter(L[,1], L[,2], t(prop), col = brv,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
960 radius = RADIUS, new.plot = FALSE, border=NA)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
961 ## add ledend
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
962 legend("topright", legend = rownames(prop),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
963 col = brv, pch = 15, cex= 1.5, pt.cex= 3)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
964 ## add cluster labels
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
965 text(x = L[,1], y = L[,2], labels = paste(V(G)$label,"\n",V(G)$size))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
966 return(coords)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
967 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
968
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
969
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
970 radius_size = function(G){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
971 if (vcount(G) == 1) RADIUS=120
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
972 if (vcount(G) == 2) RADIUS=50
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
973 if (vcount(G) %in% 3:8) RADIUS=40
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
974 if (vcount(G) > 8) RADIUS=30
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
975 return(RADIUS)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
976 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
977
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
978
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
979 plot_edges = function(G,L,col="#33000040", lwd = 1){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
980 e = get.edgelist(G, names = F)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
981 X0 = L[e[, 1], 1]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
982 Y0 = L[e[, 1], 2]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
983 X1 = L[e[, 2], 1]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
984 Y1 = L[e[, 2], 2]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
985 segments(X0, Y0, X1, Y1, lwd = lwd,col = col)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
986 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
987
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
988
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
989 pieScatter=function(x, y, prop,radius=1, col=NULL, edges=100, new.plot = TRUE, ...){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
990 if (new.plot){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
991 plot(x,y,type='n')
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
992 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
993 for (i in seq_along(x)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
994 p=prop[i,]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
995 if (length(radius)==1){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
996 r=radius
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
997 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
998 r=radius[i]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
999 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1000 include = p != 0
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1001 floating.pie(x[i], y[i],p[include],
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1002 col=col[include], radius=r,edges=50,...)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1003 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1004 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1005
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1006 unique_colors = c("#00FF00", "#0000FF", "#FF0000",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1007 "#FFA6FE", "#FFDB66", "#006401", "#010067", "#95003A",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1008 "#007DB5", "#FF00F6", "#FFEEE8", "#774D00", "#90FB92", "#01FFFE",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1009 "#0076FF", "#D5FF00", "#FF937E", "#6A826C", "#FF029D",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1010 "#FE8900", "#7A4782", "#7E2DD2", "#85A900", "#FF0056",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1011 "#A42400", "#00AE7E", "#683D3B", "#BDC6FF", "#263400",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1012 "#BDD393", "#00B917", "#9E008E", "#001544", "#C28C9F",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1013 "#FF74A3", "#01D0FF", "#004754", "#E56FFE", "#788231",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1014 "#0E4CA1", "#91D0CB", "#BE9970", "#968AE8", "#BB8800",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1015 "#43002C", "#DEFF74", "#00FFC6", "#FFE502", "#620E00",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1016 "#008F9C", "#98FF52", "#7544B1", "#B500FF", "#00FF78",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1017 "#FF6E41", "#005F39", "#6B6882", "#5FAD4E", "#A75740",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1018 "#A5FFD2", "#FFB167", "#009BFF", "#E85EBE")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1019
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1020 if (FALSE){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1021 ## For testing
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1022 for ( i in 1:100){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1023 G = get_supercluster_graph(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1024 sc=i, seqdb = seqdb,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1025 hitsortdb = hitsortdb, classification_hierarchy_file = class_file
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1026 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1027 png(paste0("/mnt/raid/users/petr/tmp/test.figure_",i,".png"), width = 1400, height=1000)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1028 plot_supercluster(G)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1029 dev.off()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1030 print(i)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1031 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1032
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1033 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1034
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1035 plotg = function(GG,LL,wlim=NULL,...){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1036 ## quick ploting function
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1037 e = get.edgelist(GG, names=F)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1038 w = E(GG)$weight
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1039 if (!is.null(wlim)) {e = e[w > wlim,]; w = w[w > wlim]}
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1040 X0 = LL[e[,1],1]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1041 Y0 = LL[e[,1],2]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1042 X1 = LL[e[,2],1]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1043 Y1 = LL[e[,2],2]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1044 plot(range(LL[,1]),range(LL[,2]),xlab="",ylab="",axes=FALSE,type="n",...)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1045 brv = 'grey'
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1046 segments(X0,Y0,X1,Y1,lwd=.5,col=brv)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1047 points(LL,pch=18,cex=.8,...)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1048 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1049
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1050
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1051
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1052
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1053 get_cluster_info = function(index){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1054 ## must be connected to hitsort database ( HITSORT)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1055 x = dbGetQuery(HITSORTDB,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1056 paste0("SELECT * FROM cluster_info WHERE [index]=",index))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1057 return(as.list(x))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1058 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1059
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1060 get_supercluster_info = function(sc){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1061 ## must be connected to hitsort database ( HITSORT)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1062 x = dbGetQuery(HITSORTDB,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1063 paste0("SELECT * FROM cluster_info WHERE supercluster=",sc))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1064 return(x)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1065 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1066
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1067 get_supercluster_summary = function(sc){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1068 info = get_supercluster_info(sc)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1069 Nclusters = nrow(info)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1070 Nreads = supercluster_size(sc)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1071 N = dbGetQuery(SEQDB, "SELECT count(*) from sequences")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1072 proportion = Nreads/N
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1073 cluster_list = info$index
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1074 return(list(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1075 Nreads = Nreads,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1076 Nclusters = Nclusters,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1077 proportion = proportion,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1078 cluster_list = cluster_list
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1079 ))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1080 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1081
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1082
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1083 get_cluster_connection_info = function(index, search_type=c("pair", "similarity")){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1084 search_type = match.arg(search_type)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1085 if (search_type == "pair"){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1086 x = dbGetQuery(HITSORTDB,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1087 sprintf(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1088 "SELECT * FROM cluster_mate_connections WHERE c1==%d OR c2=%d",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1089 index, index)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1090 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1091 if (nrow(x) == 0 ){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1092 ## no connections
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1093 return(NULL)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1094 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1095 other_cl = ifelse(x$c1 == index, x$c2, x$c1)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1096 out = data.frame(cl = other_cl, N = x$N)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1097 out$ids = unlist(strsplit(x$ids, split = ", "))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1098 k = dbGetQuery(HITSORTDB,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1099 sprintf(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1100 "SELECT * FROM cluster_mate_bond WHERE c1==%d OR c2=%d",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1101 index, index)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1102 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1103 if (k$c1 != x$c1 || k$c2 !=x$c2){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1104 ## tables not in same order
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1105 stop("tables cluster_mate_bond and cluster_mate_connection are not in the same order")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1106 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1107 out$k = k$k
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1108 return(out)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1109 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1110 x = dbGetQuery(HITSORTDB,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1111 sprintf(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1112 "SELECT * FROM cluster_connections WHERE c1==%d OR c2=%d",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1113 index, index)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1114 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1115 if (nrow(x) == 0 ){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1116 ## no connections
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1117 return(NULL)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1118 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1119 other_cl = ifelse(x$c1 == index, x$c2, x$c1)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1120 out = data.frame(cl = other_cl, N = x$"count(*)")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1121 return(out)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1122 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1123 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1124
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1125
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1126 format_clinfo=function(x){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1127 include = c(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1128 "size",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1129 "size_real",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1130 "ecount",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1131 "supercluster",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1132 "annotations_summary",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1133 "ltr_detection",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1134 "pair_completeness",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1135 "TR_score",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1136 "TR_monomer_length",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1137 "loop_index",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1138 "satellite_probability",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1139 "TR_consensus",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1140 "tandem_rank",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1141 "orientation_score"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1142 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1143 new_names = c(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1144 "TR_consensus" = "TAREAN consensus",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1145 "tandem_rank" = "TAREAN_annotation",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1146 "ltr_detection" = "PBS/LTR",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1147 'size' = 'number of reads in the graph',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1148 'size_real' = 'the total number of reads in the cluster',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1149 'ecount' = "number of edges of the graph:",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1150 "annotations_summary" = "similarity based annotation"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1151 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1152 values = unlist(x[include]) %>% gsub("\n","<br>", .)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1153 include = replace(include, match(names(new_names),include), new_names)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1154 names(values) = include
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1155 ## replace tandem rank with annotation description
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1156 values["TAREAN_annotation"] = RANKS_TANDEM[values["TAREAN_annotation"]]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1157 ## create HTML table without header
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1158 desc = df2html(data.frame(names(values), values))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1159 return(desc)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1160 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1161
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1162 create_cluster_report = function(index,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1163 seqdb, hitsortdb,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1164 classification_hierarchy_file,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1165 HTML_LINKS){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1166 ## index - index of cluster, integer
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1167 ## seqdb, hitsortdb - sqlite databases
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1168 ## classification_hierarchy_file - rds file with classification
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1169 ## other information about cluster is collected from HITSORTDB
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1170 connect_to_databases(seqdb, hitsortdb, classification_hierarchy_file)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1171 ## basic info about cluster
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1172 clinfo = get_cluster_info(index)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1173 HTML_LINKS = nested2named_list(HTML_LINKS)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1174 PNGWIDTH = 1000 # this must be kept so than link in png work correctly
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1175 PNGHEIGHT = 1000
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1176 LEGENDWIDTH = 300
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1177 PS = 30 ## pointsize for plotting
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1178 MAX_N = 10 ## to show mates or similarities connections
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1179
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1180 #################################################################################
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1181 ## create HTML title:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1182 title = paste("Cluster no. ",index)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1183 htmlheader = gsub("PAGE_TITLE", title, HTMLHEADER)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1184 cat(htmlheader, file = clinfo$html_report_main)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1185 file.copy(paste0(WD,"/style1.css"), clinfo$dir)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1186 HTML.title(title, file = clinfo$html_report_main)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1187 ## print basic info to HTML header:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1188 ## make link back to cluster table
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1189 hwrite("Go back to cluster table <br><br>\n", link=HTML_LINKS$CLUSTER_TO_CLUSTER_TABLE) %>%
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1190 cat(file = clinfo$html_report_main, append =TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1191 ## create ling to corresponding supercluster
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1192 cat("Cluster is part of ",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1193 hwrite (paste("supercluster: ", clinfo$supercluster),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1194 link=sprintf(HTML_LINKS$CLUSTER_TO_SUPERCLUSTER, clinfo$supercluster)),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1195 "\n<br><br>\n",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1196 file = clinfo$html_report_main, append =TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1197 HTML.title("Cluster characteristics:", HR = 3, file = clinfo$html_report_main)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1198 cat(format_clinfo(clinfo), file = clinfo$html_report_main, append =TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1199 #################################################################################
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1200 ## add link to image with graph layout with domains
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1201 fs = list(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1202 graph_domains = paste0(clinfo$html_report_files,"/graph_domains.png"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1203 graph_mates = paste0(clinfo$html_report_files,"/graph_mates%04d_%04d.png"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1204 graph_base = paste0(clinfo$html_report_files,"/graph_base.png"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1205 graph_comparative = paste0(clinfo$html_report_files,"/graph_comparative.png")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1206 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1207 fs_relative = list(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1208 graph_domains = paste0(basename(clinfo$html_report_files),"/graph_domains.png"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1209 graph_comparative = paste0(basename(clinfo$html_report_files),"/graph_comparative.png"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1210 graph_mates = paste0(basename(clinfo$html_report_files),"/graph_mates%04d_%04d.png")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1211 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1212
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1213 dir.create(clinfo$html_report_files)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1214 load(clinfo$graph_file)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1215 annot = cluster_annotation(index)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1216 annot_summary = summarize_annotation(annot, clinfo$size_real)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1217 vertex_colors = annot2colors(annot,ids = V(GL$G)$name)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1218
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1219 color_proportions = sort(table(vertex_colors$color_table)) / clinfo$size_real
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1220 ## hits with smaller proportion are not shown!
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1221 exclude_colors = names(color_proportions)[color_proportions < 0.001]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1222 vertex_colors$color_table[vertex_colors$color_table %in% exclude_colors] = "#00000050"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1223 vertex_colors$legend = vertex_colors$legend[!vertex_colors$legend %in% exclude_colors]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1224
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1225 if (is_comparative()){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1226 comp_codes = get_comparative_codes()$prefix
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1227 colors = unique_colors[seq_along(comp_codes)]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1228 names(colors) = comp_codes
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1229 species = substring(V(GL$G)$name,1, nchar(comp_codes[1]))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1230 ## create image with highlighted domains
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1231 png(fs$graph_comparative, width = PNGWIDTH + LEGENDWIDTH, height = PNGHEIGHT, pointsize = PS)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1232 layout(matrix(1:2, ncol = 2), width = c(10, 3))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1233 plotg(GL$G,GL$L, col = colors[species])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1234 par(mar = c(0, 0, 0, 0))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1235 plot.new()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1236 legend("topleft", col=colors,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1237 legend = names(colors),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1238 pch = 15, cex = 0.7)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1239 dev.off()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1240 HTML.title("comparative analysis:", HR=4, file = clinfo$html_report_main)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1241 html_insert_image(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1242 img_file = fs_relative$graph_comparative,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1243 htmlfile = clinfo$html_report_main)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1244 ## comparative analysis - calculate statistics:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1245 V1V2 = substring(get.edgelist(GL$G),1, nchar(comp_codes[1]))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1246 C_observed = table(data.frame(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1247 V1=factor(V1V2[,1],levels = comp_codes),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1248 V2=factor(V1V2[,2],levels = comp_codes)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1249 ))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1250 C_observed = as.data.frame.matrix(C_observed + t(C_observed))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1251 P_observed = as.data.frame.matrix(C_observed/sum(C_observed))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1252 Edge_propotions = table(factor(V1V2, levels = comp_codes))/(length(V1V2))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1253 P_expected = matrix(Edge_propotions,ncol=1) %*% matrix(Edge_propotions, nrow=1)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1254
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1255 spec_counts = df2html(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1256 data.frame(table(factor(species, levels=comp_codes))),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1257 header = c("Species", "Read count")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1258 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1259
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1260 edge_count = df2html(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1261 data.frame(species = comp_codes, (C_observed/2)[,]),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1262 header = c("", comp_codes)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1263 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1264
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1265 P_OE = df2html(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1266 data.frame(species=comp_codes,(P_observed/P_expected[,])),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1267 header = c("", comp_codes)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1268 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1269
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1270 HTML.title("Comparative analysis - species read counts:", HR =5, file = clinfo$html_report_main)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1271 cat(spec_counts,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1272 file = clinfo$html_report_main, append =TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1273
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1274 # show connection only if more than one species in cluster
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1275 if (length(unique(species))>1){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1276 HTML.title("comparative analysis - number of edges between species:",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1277 HR =5, file = clinfo$html_report_main)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1278 cat(edge_count,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1279 file = clinfo$html_report_main, append = TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1280
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1281 HTML.title("comparative analysis - observed/expected number of edges between species",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1282 HR =5, file = clinfo$html_report_main)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1283 cat(P_OE,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1284 file = clinfo$html_report_main, append = TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1285 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1286
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1287 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1288
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1289
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1290 ## create image with highlighted domains
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1291 png(fs$graph_domains, width = PNGWIDTH + LEGENDWIDTH, height = PNGHEIGHT, pointsize = PS)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1292 layout(matrix(1:2, ncol = 2), width = c(10, 3))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1293 plotg(GL$G,GL$L, col = vertex_colors$color_table)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1294 domains_detected = length(vertex_colors$legend) > 0
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1295 par(mar = c(0, 0, 0, 0))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1296 plot.new()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1297 if (domains_detected){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1298 # domains found
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1299 legend("topleft", col=vertex_colors$legend,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1300 legend = names(vertex_colors$legend),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1301 pch = 15, cex = 0.7)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1302 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1303 dev.off()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1304
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1305 HTML.title("protein domains:", HR=4, file = clinfo$html_report_main)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1306 if (!domains_detected){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1307 HTML("No protein domains detected", file = clinfo$html_report_main)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1308 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1309 HTML("protein domains:", HR=4, file = clinfo$html_report_main)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1310 html_insert_image(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1311 img_file = fs_relative$graph_domains,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1312 htmlfile = clinfo$html_report_main)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1313
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1314 #############################################################################
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1315 if (nrow(annot_summary) == 0){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1316 HTML.title("Reads annotation summary", HR = 3, file = clinfo$html_report_main)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1317 HTML("No similarity hits to repeat databases found", file = clinfo$html_report_main)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1318 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1319 HTML(annot_summary, file = clinfo$html_report_main, align = "left")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1320 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1321
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1322
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1323 ## similarity and mate cluster
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1324 mate_clusters = get_cluster_connection_info(index, search_type="pair")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1325 similar_clusters = get_cluster_connection_info(index, search_type="similarity")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1326 ## report mate and similarity clusters
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1327 if (!is.null(similar_clusters)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1328 HTML.title("clusters with similarity:", file =clinfo$html_report_main, HR = 3)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1329 cat(df2html(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1330 similar_clusters,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1331 header=c("Cluster","Number of similarity hits"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1332 sort_col = "N", scroling = TRUE
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1333 ),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1334 file =clinfo$html_report_main, append=TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1335 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1336 if (!is.null(mate_clusters)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1337 HTML.title("clusters connected through mates:", file =clinfo$html_report_main, HR = 3)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1338 cat(df2html(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1339 mate_clusters[,c('cl','N','k')],
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1340 header = c('Cluster','Number of shared<br> read pairs','k'),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1341 sort_col = "N", scroling = TRUE
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1342 ),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1343 file = clinfo$html_report_main,append = TRUE
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1344 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1345
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1346 ## create base graph images - it will serve as background for
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1347 ## mate clusters plots
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1348 png(fs$graph_base,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1349 width = PNGWIDTH, height = PNGHEIGHT, pointsize = PS)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1350 par(mar=c(0,0,0,0),xaxs="i",yaxs="i")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1351 plotg(GL$G,GL$L, col = "#00000050")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1352 dev.off()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1353 ## load base as raster image
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1354 base_image = readPNG(fs$graph_base)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1355
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1356 for (i in order(mate_clusters$N, decreasing = TRUE)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1357 mate_ids = unlist(strsplit(mate_clusters$ids[[i]],split=","))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1358 ## print only graph above MAX_N mates
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1359 if (length(mate_ids) < MAX_N){ # TODO - use constant
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1360 next
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1361 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1362 png(sprintf(fs$graph_mates,index, mate_clusters$cl[i]),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1363 width = PNGWIDTH, height = PNGHEIGHT, pointsize = PS)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1364 color_mate = gsub(".$","",V(GL$G)$name) %in% mate_ids %>%
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1365 ifelse("#FF0000FF", "#000000AA")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1366 par(mar=c(0,0,0,0),xaxs="i",yaxs="i")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1367 plot(range(GL$L[,1]), range(GL$L[,2]), type = "n", xlab = "", ylab = "", axes = FALSE,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1368 main = paste0("CL",index," ----> CL",mate_clusters$cl[i]))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1369 rasterImage(base_image,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1370 range(GL$L[,1])[1], range(GL$L[,2])[1],
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1371 range(GL$L[,1])[2], range(GL$L[,2])[2]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1372 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1373 points(GL$L[,1:2], col = color_mate,pch=18,cex=.8)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1374 dev.off()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1375 title = paste0("CL",index," ----> CL",mate_clusters$cl[i])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1376 footer = paste0("No. of shared pairs: :", mate_clusters$N[i])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1377 html_insert_floating_image(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1378 img_file = sprintf(fs_relative$graph_mates, index, mate_clusters$cl[i]),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1379 htmlfile = clinfo$html_report_main, width = 200,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1380 title = title, footer = footer
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1381 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1382 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1383 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1384 }