annotate lib/reporting.R @ 6:2925751ed586 draft

Uploaded
author petrn
date Fri, 20 Dec 2019 12:59:39 +0000
parents f6ebec6e235e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
1 #!/usr/bin/env Rscript
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
2 library(R2HTML)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
3 library(hwriter)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
4 library(DT)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
5 library(tools)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
6
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
7 source("htmlheader.R")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
8 source("config.R") # load TANDEM_RANKS
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
9 source("utils.R")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
10 DT_OPTIONS = list(pageLength = 1000, lengthMenu = c(10, 50, 100, 1000, 5000, 10000))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
11 HTMLHEADER_TAREAN = gsub("PAGE_TITLE","TAREAN summary", htmlheader)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
12 HTMLHEADER_INDEX = gsub("PAGE_TITLE","Clustering summary", htmlheader)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
13
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
14 WD = getwd() # to get script directory when run from Rserve
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
15
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
16 reformat_header = function(df){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
17 H = colnames(df)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
18 H[H=="TR_score"] = "TAREAN k-mer_coverage"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
19 H[H=="vcount"] = "|V|"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
20 H[H=="ecount"] = "|E|"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
21 H[H=="Genome_Proportion[%]"] = "Proportion[%]"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
22 H[H=="Proportion_Adjusted[%]"] = "Proportion adjusted[%]"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
23 H[H=="supercluster"] = "Super_cluster"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
24 H[H=="size_real"] = "Number of reads"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
25 H[H=="TR_monomer_length"] = "Consensus_length"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
26 H[H=="TR_consensus"] = "Consensus"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
27 H[H=="pbs_score"] = "PBS score"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
28 H[H=="ltr_detection"] = "LTR detection"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
29 H[H=="kmer_analysis"] = "TAREAN k-mer analysis"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
30
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
31 # H[H=="annotations_summary"] = "Similarity_hits"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
32 H[H=="annotations_summary"] = "Similarity_hits_[above 0.1%]"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
33 H[H=="annotations_summary_custom"] = "Similarity_hits_to_custom_database"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
34 H[H=="loop_index"] = "connected_component_index C"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
35 H[H=="pair_completeness"] = "pair_completeness_index_P"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
36 H = gsub("_","<br>",H)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
37 H=gsub("TR_","",H)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
38 H = capitalize(H)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
39 colnames(df) = H
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
40 return(df)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
41 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
42
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
43 reformat4html=function(df){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
44 for (n in colnames(df)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
45 if (class(df[,n]) == 'character'){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
46 df[,n] = gsub("\n","<br>", df[,n])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
47 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
48 if (class(df[,n]) == 'numeric'){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
49 df[,n] = signif(df[,n],3)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
50 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
51 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
52 return(df)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
53 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
54
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
55 capitalize = function(s){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
56 paste(toupper(substring(s, 1, 1)),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
57 substring(s, 2),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
58 sep="")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
59 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
60
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
61
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
62 create_main_reports = function(paths, N_clustering, N_input,N_omit, merge_threshold,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
63 paired, consensus_files, custom_db, tarean_mode,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
64 HTML_LINKS, pipeline_version_info, max_memory,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
65 max_number_reads_for_clustering, mincln){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
66 ## this create main html index and also tarean report ##
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
67 ## index and tarean html reports are created always
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
68 ## extract all paths and directories
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
69 HTML_LINKS = nested2named_list(HTML_LINKS)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
70 paths = nested2named_list(paths)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
71 csvfile = paths[['clusters_info']]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
72 clusters_summary_csv = paths[['clusters_summary_csv']]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
73 profrep_classification_csv = paths[['profrep_classification_csv']]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
74 htmlfile = paths[["tarean_report_html"]]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
75 html_report_dt = paths[["cluster_report_html"]]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
76 main_report = paths[["main_report_html"]]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
77 summarized_annnotation_html = paths[["summarized_annotation_html"]]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
78 libdir = paths[['libdir']]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
79 clusters_dir = paths[["clusters__relative"]]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
80 superclusters_dir = paths[['superclusters__relative']]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
81 seqdb = paths[['sequences_db']]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
82 hitsortdb = paths[['hitsort_db']]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
83 connect_to_databases(seqdb, hitsortdb)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
84 dfraw = read.table(csvfile, as.is=TRUE, header=TRUE, sep="\t", na.strings = c('None','NA'))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
85 # table must be updated
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
86 dfraw$supercluster_best_hit = dbGetQuery(HITSORTDB, "SELECT supercluster_best_hit FROM cluster_info")[, 1]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
87 ## columns to use
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
88 selected_cols = c("index", "size_real","size_adjusted", "vcount","ecount",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
89 "loop_index", "pair_completeness",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
90 'satellite_probability','satellite',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
91 'TR_score','pbs_score','ltr_detection', 'TR_monomer_length',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
92 'TR_consensus', "annotations_summary", "supercluster", 'tandem_rank',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
93 'supercluster_best_hit')
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
94
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
95 ## some columns are added (like Graph_layout, clusters,...)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
96 ## columns for html report
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
97 selected_cols_tarean = c(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
98 "Cluster",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
99 "Proportion[%]",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
100 "Proportion_Adjusted[%]",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
101 "size_real",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
102 'satellite_probability',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
103 'TR_monomer_length',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
104 'TR_consensus',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
105 'Graph_layout',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
106 'kmer_analysis',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
107 "loop_index",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
108 "pair_completeness",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
109 'TR_score',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
110 "vcount",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
111 "ecount",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
112 'pbs_score',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
113 "annotations_summary"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
114 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
115 selected_cols_main = c(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
116 "Cluster",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
117 "supercluster",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
118 "Proportion[%]",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
119 "Proportion_Adjusted[%]",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
120 "size_real",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
121 'Graph_layout',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
122 "annotations_summary",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
123 'ltr_detection',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
124 'satellite_probability',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
125 'TAREAN_annotation',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
126 'TR_monomer_length',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
127 'TR_consensus',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
128 'kmer_analysis',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
129 "loop_index",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
130 "pair_completeness",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
131 'TR_score',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
132 "ecount",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
133 "vcount"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
134 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
135
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
136 if (custom_db){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
137 selected_cols_main = c(selected_cols_main, "annotations_summary_custom")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
138 selected_cols_tarean = c(selected_cols_tarean, "annotations_summary_custom")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
139 selected_cols = c(selected_cols, "annotations_summary_custom")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
140 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
141 if (is_comparative()){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
142 prefix_codes = dbGetQuery(SEQDB, "SELECT * FROM prefix_codes")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
143 species_counts = dbGetQuery(HITSORTDB, "SELECT * FROM comparative_counts")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
144 superclusters = dbGetQuery(HITSORTDB,paste(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
145 "SELECT supercluster, cluster FROM superclusters WHERE cluster <=",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
146 nrow(species_counts))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
147 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
148 species_counts = merge(superclusters, species_counts, by.x = "cluster", by.y = "clusterindex")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
149 ## include commented header with total counts:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
150 cat("# Total counts:\t\t", paste(prefix_codes$N, collapse="\t"),"\n#\n",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
151 sep="",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
152 file = paths[['comparative_analysis_counts_csv']])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
153
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
154 write.table(species_counts, file = paths[['comparative_analysis_counts_csv']],
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
155 sep = "\t", col.names = TRUE, row.names = FALSE, append=TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
156 species_counts_formated = apply(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
157 species_counts[, prefix_codes$prefix, drop = FALSE],
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
158 1, function(x) paste(prefix_codes$prefix, ":", x, "\n",sep='', collapse=""))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
159 dfraw$species_counts = species_counts_formated[1:nrow(dfraw)]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
160 selected_cols = c(selected_cols, "species_counts")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
161 selected_cols_main = c(selected_cols_main, "species_counts")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
162 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
163
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
164
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
165 df_report = dfraw[,selected_cols]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
166 ## describe tandem ranks:
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
167 df_report$TAREAN_annotation = RANKS_TANDEM[as.character(df_report$tandem_rank)]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
168 ## remove Cluster_similarity_hits
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
169 df_report_csv = reformat_df_report(df_report)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
170 df_report_csv = df_report_csv[,!colnames(df_report_csv) %in% "Cluster_similarity_hits"]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
171 df_report_csv$Final_annotation=""
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
172
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
173 ## make table for profrep classification
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
174 write.table(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
175 reformat_df_to_profrep_classification(df_report), file = profrep_classification_csv,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
176 sep = "\t", col.names = FALSE, row.names = FALSE, quote = FALSE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
177
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
178 df_report$"kmer_analysis" = ifelse(dfraw$putative_tandem, hwrite("report", link = dfraw$html_tarean), "N/A")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
179 df_report$"Graph_layout" = hwriteImage(dfraw$image_file_tmb, link = dfraw$image_file, table = FALSE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
180 df_report$Cluster = paste0("CL", df_report$index)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
181 df_report$"Proportion[%]" = signif (100 * df_report$size_real / N_clustering, 2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
182 df_report$"Proportion_Adjusted[%]" = signif (100 * df_report$size_adjusted / N_clustering, 2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
183 if (!tarean_mode){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
184
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
185 df_report$Cluster=sapply(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
186 df_report$index,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
187 function(x) hwrite(x, link = sprintf("%s/dir_CL%04d/index.html", clusters_dir, x)))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
188
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
189 df_report$supercluster = sapply(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
190 df_report$supercluster,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
191 function(x) hwrite(x, link = sprintf("%s/dir_SC%04d/index.html", superclusters_dir, x)))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
192 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
193 ## TAREAN report
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
194 ## copy tarean output data help to place nad make link to it
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
195 file.copy(paste0(WD,"/style1.css"), dirname(htmlfile))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
196 file.copy(paste0(WD,"/documentation.html"), dirname(htmlfile))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
197
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
198 tarean_html = start_html(htmlfile, HTMLHEADER_TAREAN)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
199 tarean_html("Tandem Repeat Analyzer", HTML.title, HR=1)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
200 tarean_html = start_html(htmlfile, HTMLHEADER_TAREAN)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
201 tarean_html('Run statistics:', HTML.title, HR=2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
202 tarean_html(paste("Number of input reads:", N_input ))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
203 tarean_html(paste("Number of analyzed reads:", N_clustering))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
204 tarean_html(paste("Cluster merging:",ifelse(merge_threshold == 0,"No", "Yes")))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
205
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
206 ## export links to consensus sequecnes in fasta files
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
207 tarean_html("Consensus files - fasta format:", HTML.title, HR=2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
208 for (i in TANDEM_RANKS[TANDEM_RANKS != 0]){ ## no consensus for rank 0
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
209 if (!is.null (consensus_files[[i]])){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
210 N = sum(dfraw$tandem_rank == TANDEM_RANKS[i])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
211 name_string = paste(names(TANDEM_RANKS)[i]," - total ", N, "found" )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
212 tarean_html(paste("<p>",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
213 hwrite(name_string, download = name_string,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
214 link = basename(consensus_files[[i]][[1]])),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
215 "<br>\n"))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
216
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
217 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
218 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
219 ## print link to documentation ##
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
220 tarean_html("Documentation", HTML.title, HR=2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
221 tarean_html(paste('<p> For the explanation of TAREAN output see',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
222 ' <a href="documentation.html#tra" > the help section </a> <p>'))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
223 ## HOW TO CITE section)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
224
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
225 ## PRINT TABLES WITH CLUSTERS
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
226 for (n in names(TANDEM_RANKS)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
227 tarean_html(n, HTML.title, HR=2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
228 inc <- dfraw$tandem_rank == TANDEM_RANKS[n]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
229 if (sum(inc > 0)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
230 tarean_html(reformat4html(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
231 reformat_header(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
232 df_report[inc, selected_cols_tarean ,drop=FALSE]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
233 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
234 ),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
235 align = "left", digits = 3)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
236 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
237 tarean_html("not found")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
238 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
239 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
240
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
241 ## export table with all cluster
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
242 cat("",file = html_report_dt)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
243
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
244 DT_instance = df_report[,selected_cols_main, drop = FALSE] %>%
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
245 reformat_header %>% reformat4html %>% datatable(escape = FALSE, options = DT_OPTIONS) %>%
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
246 formatStyle(columns = seq_along(selected_cols), "font-size" = "12px") %>%
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
247 formatStyle(columns = "Similarity<br>hits<br>[above 0.1%]", "min-width" = "500px")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
248
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
249 saveWidget(DT_instance, file = normalizePath(html_report_dt),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
250 libdir=normalizePath(libdir) , selfcontained = FALSE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
251
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
252 add_preamble(normalizePath(html_report_dt),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
253 preamble='<h2>Cluster annotation</h2> <p><a href="documentation.html#clust"> For table legend see documentation. <a> </p>')
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
254
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
255 ## Main page - Clustering info - global information about clustering
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
256 top_clusters_prop = sum(df_report$size_real)/N_clustering
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
257 clustering_info = summary_histogram(fn = paths[["summary_histogram"]], N_clustering, N_omit, df_report$size_adjusted,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
258 top_clusters_prop)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
259 index_html = start_html(main_report, HTMLHEADER_INDEX)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
260 index_html("Clustering Summary", HTML.title, HR = 1)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
261
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
262 index_html(paste0('<a href="',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
263 paths[['summary_histogram__relative']],
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
264 '"> <img src="', paths[['summary_histogram__relative']],
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
265 '" width="700" border="1" >',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
266 ' </a>'), cat)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
267 index_html('<p> <b> Graphical summary of the clustering results. </b> Bars represent superclusters, with their heights and widths corresponding to the numbers of reads in the superclusters (y-axis) and to their proportions in all analyzed reads (x-axis), respectively. Rectangles inside the supercluster bars represent individual clusters. If the filtering of abundant satellites was performed, the affected clusters are shown in green, and their sizes correspond to the adjusted values. Blue and pink background panels show proportions of reads that were clustered and remained single, respectively. Top clusters are on the left of the dotted line. </p><hr><br><br>',cat)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
268
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
269 index_html('Run information:', HTML.title, HR = 2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
270 index_html(paste("Number of input reads:", N_input ))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
271 index_html(paste("Number of analyzed reads:", N_clustering))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
272 if (N_omit != 0){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
273 index_html(paste("Number of reads removed by automatic filtering of abundant putative satellites:", N_omit))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
274 index_html(paste("Number of remaining reads after filtering of abundant satellites:", N_clustering - N_omit ))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
275 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
276
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
277 index_html(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
278 paste(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
279 "Proportion of reads in top clusters :",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
280 signif(100 * sum(df_report$size_real)/N_clustering,2),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
281 "%"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
282 ))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
283 index_html(paste("Cluster merging:",ifelse(merge_threshold == 0,"No", "Yes")))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
284 index_html(paste("Paired-end reads:",ifelse(paired, "Yes", "No")))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
285 index_html("Available analyses:", HTML.title, HR=2)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
286 index_html(paste("<p>",hwrite("Tandem repeat analysis", link = HTML_LINKS$INDEX_TO_TAREAN),"</p>"),cat)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
287
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
288 if (!tarean_mode){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
289 index_html(paste("<p>", hwrite("Cluster annotation", link = HTML_LINKS$INDEX_TO_CLUSTER_REPORT),"</p>"),cat)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
290 index_html(paste("<p>", hwrite("Supercluster annotation",
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
291 link = HTML_LINKS$INDEX_TO_SUPERCLUSTER_REPORT),"</p>"),cat)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
292 index_html(paste("<p>", hwrite("Repeat annotation summary", link = HTML_LINKS$INDEX_TO_SUMMARIZED_ANNOTATION),"</p>"),cat)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
293 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
294
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
295 if (is_comparative()) {
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
296 tryCatch({
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
297 imagemap = plot_rect_map(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
298 read_counts = paths[['comparative_analysis_counts_csv']],
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
299 cluster_annotation = paths[['profrep_classification_csv']],
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
300 output_file = paths[['comparative_summary_map']]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
301 )},
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
302 error = function(err){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
303 print(paste("error while plotting ", err))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
304 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
305 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
306
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
307 HTML.title("Comparative analysis - Total number of reads in clustering analysis", file = main_report)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
308 index_html(df2html(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
309 prefix_codes,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
310 header = c("Code", "Total read count"), rounding_function = round),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
311 cat
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
312 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
313 HTML.title("Comparative analysis - Number of reads in individual clusters", file = main_report)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
314
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
315 index_html(paste0('<img src="', paths[['comparative_summary_map__relative']],
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
316 '" usemap ="#clustermap" border="2">'), cat)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
317
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
318 index_html(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
319 "Bar plot on top shows the size of individual clusters. Size of the rectangles in lower panel is proportional to the number of reads in a cluster for each species. Clusters and species were sorted using hierarchical clustering. Bars and rectangles in the plot are hyperlinked to the individual cluster reports.")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
320 index_html(imagemap)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
321 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
322
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
323 how2cite = readLines(paths[["how_to_cite"]])
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
324
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
325 index_html(how2cite, cat, sep="\n")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
326 index_html("<br><hr>", cat)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
327 index_html('Details:', HTML.title, HR = 3)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
328 index_html(pipeline_version_info %>% preformatted, cat)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
329 index_html(paste0("Minimal number of reads in cluster to be considered top cluster : ", mincln))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
330 index_html(paste0("Reserved Memory : ", round(max_memory/(1024*1024)), "G"))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
331 index_html(paste0("Maximum number of processable reads with the reserved memory : ", max_number_reads_for_clustering))
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
332
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
333
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
334 ## export to csv
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
335 clustering_info$Number_of_analyzed_reads = N_clustering
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
336 write.table(t(as.data.frame(clustering_info)),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
337 file = clusters_summary_csv, sep="\t", col.names = FALSE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
338 cat("\n", file = clusters_summary_csv, append = TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
339 write.table(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
340 df_report_csv, file = clusters_summary_csv,
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
341 sep = "\t", col.names = TRUE, row.names = FALSE, quote = TRUE, append=TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
342 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
343
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
344 dummy_function = function(){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
345 print("dummy function")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
346 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
347
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
348 reformat_df_report = function(df_report){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
349 # for printing to csv - this should be consise
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
350 df_report$TR_consensus = gsub("(<pre>)|(</pre>)","",df_report$TR_consensus)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
351 df_report$tandem_rank = NULL
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
352 ## make suitable order and rename
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
353 if ("annotations_summary_custom" %in% colnames(df_report)){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
354 custom = "annotations_summary_custom"
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
355 }else{
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
356 custom=character()
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
357 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
358 df_out = df_report[,c('index',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
359 'supercluster',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
360 'size_real',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
361 'size_adjusted',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
362 'supercluster_best_hit',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
363 'TAREAN_annotation',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
364 'annotations_summary',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
365 custom)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
366 ]
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
367
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
368 colnames(df_out) = c('Cluster',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
369 'Supercluster',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
370 'Size',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
371 'Size_adjusted',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
372 'Automatic_annotation',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
373 'TAREAN_annotation',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
374 'Cluster_similarity_hits',
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
375 custom)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
376 return(df_out)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
377 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
378
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
379 reformat_df_to_profrep_classification = function(df_report){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
380 CL = df_report$index
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
381 best_hit = df_report$supercluster_best_hit
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
382 ## format conversion(in order):
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
383 replacement = list(
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
384 c("/", "|"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
385 c("Ty1_copia", "Ty1/copia"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
386 c("Ty3_gypsy", "Ty3/gypsy"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
387 c("TatIV_Ogre", "TatIV/Ogre"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
388 c("Ogre_Tat", "Ogre/Tat"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
389 c("EnSpm_CACTA", "EnSpm/CACTA"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
390 c("MuDR_Mutator", "MuDR/Mutator"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
391 c("PIF_Harbinger", "PIF/Harbinger"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
392 c("Tc1/Mariner", "Tc1/Mariner"),
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
393 c("All|", "")
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
394 )
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
395 for (i in replacement){
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
396 best_hit = gsub(i[1], i[2], best_hit, fixed = TRUE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
397 }
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
398 best_hit = gsub("^All", "", best_hit, fixed = FALSE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
399 best_hit = ifelse(best_hit == "", paste0("unknown_CL", CL), best_hit)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
400 output = data.frame(Cluster = CL, classification = best_hit, stringsAsFactors = FALSE)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
401 return(output)
f6ebec6e235e Uploaded
petrn
parents:
diff changeset
402 }