Mercurial > repos > petrn > repeatexplorer
comparison lib/tarean_output_help.html @ 0:f6ebec6e235e draft
Uploaded
| author | petrn | 
|---|---|
| date | Thu, 19 Dec 2019 13:46:43 +0000 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:f6ebec6e235e | 
|---|---|
| 1 <?xml version="1.0" encoding="utf-8"?> | |
| 2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
| 3 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
| 4 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> | |
| 5 <head> | |
| 6 <!-- 2016-10-21 Pá 11:06 --> | |
| 7 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> | |
| 8 <meta name="viewport" content="width=device-width, initial-scale=1" /> | |
| 9 <title>TAREAN output description</title> | |
| 10 <meta name="generator" content="Org-mode" /> | |
| 11 <meta name="author" content="petr" /> | |
| 12 <style type="text/css"> | |
| 13 <!--/*--><![CDATA[/*><!--*/ | |
| 14 .title { text-align: center; | |
| 15 margin-bottom: .2em; } | |
| 16 .subtitle { text-align: center; | |
| 17 font-size: medium; | |
| 18 font-weight: bold; | |
| 19 margin-top:0; } | |
| 20 .todo { font-family: monospace; color: red; } | |
| 21 .done { font-family: monospace; color: green; } | |
| 22 .priority { font-family: monospace; color: orange; } | |
| 23 .tag { background-color: #eee; font-family: monospace; | |
| 24 padding: 2px; font-size: 80%; font-weight: normal; } | |
| 25 .timestamp { color: #bebebe; } | |
| 26 .timestamp-kwd { color: #5f9ea0; } | |
| 27 .org-right { margin-left: auto; margin-right: 0px; text-align: right; } | |
| 28 .org-left { margin-left: 0px; margin-right: auto; text-align: left; } | |
| 29 .org-center { margin-left: auto; margin-right: auto; text-align: center; } | |
| 30 .underline { text-decoration: underline; } | |
| 31 #postamble p, #preamble p { font-size: 90%; margin: .2em; } | |
| 32 p.verse { margin-left: 3%; } | |
| 33 pre { | |
| 34 border: 1px solid #ccc; | |
| 35 box-shadow: 3px 3px 3px #eee; | |
| 36 padding: 8pt; | |
| 37 font-family: monospace; | |
| 38 overflow: auto; | |
| 39 margin: 1.2em; | |
| 40 } | |
| 41 pre.src { | |
| 42 position: relative; | |
| 43 overflow: visible; | |
| 44 padding-top: 1.2em; | |
| 45 } | |
| 46 pre.src:before { | |
| 47 display: none; | |
| 48 position: absolute; | |
| 49 background-color: white; | |
| 50 top: -10px; | |
| 51 right: 10px; | |
| 52 padding: 3px; | |
| 53 border: 1px solid black; | |
| 54 } | |
| 55 pre.src:hover:before { display: inline;} | |
| 56 pre.src-sh:before { content: 'sh'; } | |
| 57 pre.src-bash:before { content: 'sh'; } | |
| 58 pre.src-emacs-lisp:before { content: 'Emacs Lisp'; } | |
| 59 pre.src-R:before { content: 'R'; } | |
| 60 pre.src-perl:before { content: 'Perl'; } | |
| 61 pre.src-java:before { content: 'Java'; } | |
| 62 pre.src-sql:before { content: 'SQL'; } | |
| 63 | |
| 64 table { border-collapse:collapse; } | |
| 65 caption.t-above { caption-side: top; } | |
| 66 caption.t-bottom { caption-side: bottom; } | |
| 67 td, th { vertical-align:top; } | |
| 68 th.org-right { text-align: center; } | |
| 69 th.org-left { text-align: center; } | |
| 70 th.org-center { text-align: center; } | |
| 71 td.org-right { text-align: right; } | |
| 72 td.org-left { text-align: left; } | |
| 73 td.org-center { text-align: center; } | |
| 74 dt { font-weight: bold; } | |
| 75 .footpara { display: inline; } | |
| 76 .footdef { margin-bottom: 1em; } | |
| 77 .figure { padding: 1em; } | |
| 78 .figure p { text-align: center; } | |
| 79 .inlinetask { | |
| 80 padding: 10px; | |
| 81 border: 2px solid gray; | |
| 82 margin: 10px; | |
| 83 background: #ffffcc; | |
| 84 } | |
| 85 #org-div-home-and-up | |
| 86 { text-align: right; font-size: 70%; white-space: nowrap; } | |
| 87 textarea { overflow-x: auto; } | |
| 88 .linenr { font-size: smaller } | |
| 89 .code-highlighted { background-color: #ffff00; } | |
| 90 .org-info-js_info-navigation { border-style: none; } | |
| 91 #org-info-js_console-label | |
| 92 { font-size: 10px; font-weight: bold; white-space: nowrap; } | |
| 93 .org-info-js_search-highlight | |
| 94 { background-color: #ffff00; color: #000000; font-weight: bold; } | |
| 95 /*]]>*/--> | |
| 96 </style> | |
| 97 <link rel="stylesheet" type="text/css" href="style1.css" /> | |
| 98 <script type="text/javascript"> | |
| 99 /* | |
| 100 @licstart The following is the entire license notice for the | |
| 101 JavaScript code in this tag. | |
| 102 | |
| 103 Copyright (C) 2012-2013 Free Software Foundation, Inc. | |
| 104 | |
| 105 The JavaScript code in this tag is free software: you can | |
| 106 redistribute it and/or modify it under the terms of the GNU | |
| 107 General Public License (GNU GPL) as published by the Free Software | |
| 108 Foundation, either version 3 of the License, or (at your option) | |
| 109 any later version. The code is distributed WITHOUT ANY WARRANTY; | |
| 110 without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 111 FOR A PARTICULAR PURPOSE. See the GNU GPL for more details. | |
| 112 | |
| 113 As additional permission under GNU GPL version 3 section 7, you | |
| 114 may distribute non-source (e.g., minimized or compacted) forms of | |
| 115 that code without the copy of the GNU GPL normally required by | |
| 116 section 4, provided you include this license notice and a URL | |
| 117 through which recipients can access the Corresponding Source. | |
| 118 | |
| 119 | |
| 120 @licend The above is the entire license notice | |
| 121 for the JavaScript code in this tag. | |
| 122 */ | |
| 123 <!--/*--><![CDATA[/*><!--*/ | |
| 124 function CodeHighlightOn(elem, id) | |
| 125 { | |
| 126 var target = document.getElementById(id); | |
| 127 if(null != target) { | |
| 128 elem.cacheClassElem = elem.className; | |
| 129 elem.cacheClassTarget = target.className; | |
| 130 target.className = "code-highlighted"; | |
| 131 elem.className = "code-highlighted"; | |
| 132 } | |
| 133 } | |
| 134 function CodeHighlightOff(elem, id) | |
| 135 { | |
| 136 var target = document.getElementById(id); | |
| 137 if(elem.cacheClassElem) | |
| 138 elem.className = elem.cacheClassElem; | |
| 139 if(elem.cacheClassTarget) | |
| 140 target.className = elem.cacheClassTarget; | |
| 141 } | |
| 142 /*]]>*///--> | |
| 143 </script> | |
| 144 </head> | |
| 145 <body> | |
| 146 <div id="content"> | |
| 147 <h1 class="title">TAREAN output description</h1> | |
| 148 <div id="table-of-contents"> | |
| 149 <h2>Table of Contents</h2> | |
| 150 <div id="text-table-of-contents"> | |
| 151 <ul> | |
| 152 <li><a href="#orgheadline1">1. Introduction</a></li> | |
| 153 <li><a href="#orgheadline3">2. Main HTML report</a> | |
| 154 <ul> | |
| 155 <li><a href="#orgheadline2">2.1. Table legend</a></li> | |
| 156 </ul> | |
| 157 </li> | |
| 158 <li><a href="#orgheadline5">3. Detailed cluster report</a> | |
| 159 <ul> | |
| 160 <li><a href="#orgheadline4">3.1. Table legend</a></li> | |
| 161 </ul> | |
| 162 </li> | |
| 163 <li><a href="#orgheadline7">4. Structure of the output archive</a> | |
| 164 <ul> | |
| 165 <li><a href="#orgheadline6">4.1. structure of cluster directories</a></li> | |
| 166 </ul> | |
| 167 </li> | |
| 168 </ul> | |
| 169 </div> | |
| 170 </div> | |
| 171 | |
| 172 <div id="outline-container-orgheadline1" class="outline-2"> | |
| 173 <h2 id="orgheadline1"><span class="section-number-2">1</span> Introduction</h2> | |
| 174 <div class="outline-text-2" id="text-1"> | |
| 175 <p> | |
| 176 TAREAN output includes <b>HTML report</b> with list of all analyzed clusters; the clusters are classified into five categories: | |
| 177 </p> | |
| 178 <ul class="org-ul"> | |
| 179 <li>high confidence satellites</li> | |
| 180 <li>low confidence satellites</li> | |
| 181 <li>potential LTR elements</li> | |
| 182 <li>rDNA</li> | |
| 183 <li>other clusters</li> | |
| 184 </ul> | |
| 185 <p> | |
| 186 Each cluster for which consensus sequences was reconstructed has also its own detailed report, linked to the main report. | |
| 187 </p> | |
| 188 </div> | |
| 189 </div> | |
| 190 | |
| 191 <div id="outline-container-orgheadline3" class="outline-2"> | |
| 192 <h2 id="orgheadline3"><span class="section-number-2">2</span> Main HTML report</h2> | |
| 193 <div class="outline-text-2" id="text-2"> | |
| 194 <p> | |
| 195 This report contains basic information about all clusters larger than specified threshold (default value is 0.01% of analyzed reads) | |
| 196 </p> | |
| 197 </div> | |
| 198 <div id="outline-container-orgheadline2" class="outline-3"> | |
| 199 <h3 id="orgheadline2"><span class="section-number-3">2.1</span> Table legend</h3> | |
| 200 <div class="outline-text-3" id="text-2-1"> | |
| 201 <dl class="org-dl"> | |
| 202 <dt>Cluster</dt><dd>Cluster identifier</dd> | |
| 203 <dt>Genome Proportion<code>[%]</code></dt><dd><i>(Number of sequences in cluster/Number of sequences in clustering) x 100%</i></dd> | |
| 204 <dt>Size</dt><dd>Number of reads in the cluster</dd> | |
| 205 <dt>Satellite probability</dt><dd>Empirical probability estimate that cluster sequences | |
| 206 are derived from satellite repeat. This estimate is based on analysis of more | |
| 207 than xxx clusters including yyy manually anotated and zzz experimentaly | |
| 208 validated satellite repeats</dd> | |
| 209 <dt>Consensus</dt><dd>Consensus sequence is outcome of kmer-based | |
| 210 analysis and represents the most probable satellite monomer | |
| 211 sequence</dd> | |
| 212 <dt>Kmer analysis</dt><dd>link to analysis report for individual clusters</dd> | |
| 213 <dt>Graph layout</dt><dd>Graph-based visualization of similarities among sequence | |
| 214 reads</dd> | |
| 215 <dt>Connected component index</dt><dd>Proportion of nodes of the graph which are part | |
| 216 of the the largest strongly connected component</dd> | |
| 217 <dt>Pair completeness index</dt><dd>Proportion of reads with available | |
| 218 mate-pair within the same cluster</dd> | |
| 219 <dt>Kmer coverage</dt><dd>Sum of relative frequencies of all kmers used for consensus | |
| 220 sequence reconstruction</dd> | |
| 221 <dt>|V|</dt><dd>Number of vertices of the graph</dd> | |
| 222 <dt>|E|</dt><dd>Number of edges of the graph</dd> | |
| 223 <dt>PBS score</dt><dd>Primer binding site detection score</dd> | |
| 224 <dt>The longest ORF length</dt><dd>Length of the longest open reading frame found in | |
| 225 any of the possible six reading frames. Search was done on dimer of | |
| 226 consensus so ORFs can be longer than 'monomer' length</dd> | |
| 227 <dt>Similarity-based annotation</dt><dd>Annotation based on | |
| 228 similarity search using blastn/blastx against database of known | |
| 229 repeats.</dd> | |
| 230 </dl> | |
| 231 </div> | |
| 232 </div> | |
| 233 </div> | |
| 234 <div id="outline-container-orgheadline5" class="outline-2"> | |
| 235 <h2 id="orgheadline5"><span class="section-number-2">3</span> Detailed cluster report</h2> | |
| 236 <div class="outline-text-2" id="text-3"> | |
| 237 <p> | |
| 238 Cluster report includes a list of major monomer sequence varinats reconstructed from the most frequent k-mers. The reconstructed consensus sequences are sorted based on their significance (that is, what proportion of k-mer they represent). | |
| 239 </p> | |
| 240 </div> | |
| 241 <div id="outline-container-orgheadline4" class="outline-3"> | |
| 242 <h3 id="orgheadline4"><span class="section-number-3">3.1</span> Table legend</h3> | |
| 243 <div class="outline-text-3" id="text-3-1"> | |
| 244 <dl class="org-dl"> | |
| 245 <dt>kmer</dt><dd>length of kmer used for consensus reconstruction.</dd> | |
| 246 <dt>variant</dt><dd>identifier of consensus variant.</dd> | |
| 247 <dt>total score</dt><dd>measure of significance of consensus variant. Score is calculated as a sum of weights of all k-mers used for consensus reconstruction.</dd> | |
| 248 <dt>monomer length</dt><dd>length of the consensus</dd> | |
| 249 <dt>consensus</dt><dd>consensus sequence without ambiguous bases.</dd> | |
| 250 <dt>graph image</dt><dd>part of de-Bruijn graph based on the abundant k-mers. Size of | |
| 251 vertices corresponds to k-mer frequencies, Paths in the graph which was used | |
| 252 for reconstruction of consensus sequences is gray colored.</dd> | |
| 253 <dt>logo image</dt><dd>consensus sequences shown as DNA logo. Height of letters corresponds to kmer frequencies. Logo images are linked to corresponding position probability matrices.</dd> | |
| 254 </dl> | |
| 255 </div> | |
| 256 </div> | |
| 257 </div> | |
| 258 | |
| 259 <div id="outline-container-orgheadline7" class="outline-2"> | |
| 260 <h2 id="orgheadline7"><span class="section-number-2">4</span> Structure of the output archive</h2> | |
| 261 <div class="outline-text-2" id="text-4"> | |
| 262 <p> | |
| 263 Complete results from TAREAN analysis can by downloaded as zip archive which contains the following | |
| 264 files and directories: | |
| 265 </p> | |
| 266 | |
| 267 <div class="org-src-container"> | |
| 268 | |
| 269 <pre class="src src-files">. | |
| 270 . | |
| 271 ├── clusters_info.csv <------------ list of clusters in tab delimited format | |
| 272 ├── index.html <------------ main html report | |
| 273 ├── seqclust | |
| 274 │ ├── assembly # not implemented yet | |
| 275 │ ├── blastn <------------ results of read comparison with DNA database | |
| 276 │ ├── blastx <------------ results of read comparison with protein database | |
| 277 │ ├── clustering | |
| 278 │ │ ├── clusters | |
| 279 │ │ │ ├── dir_CL0001 <----┐- detailed information about clusters | |
| 280 │ │ │ ├── dir_CL0002 <----│ | |
| 281 │ │ │ ├── dir_CL0003 <----│ | |
| 282 │ │ │ .... <----┘ | |
| 283 │ │ │ | |
| 284 │ │ └── hitsort.cls <--------- list of reads in individual clusters | |
| 285 │ ├── mgblast | |
| 286 │ ├── prerun | |
| 287 │ └── sequences <--------- input reads | |
| 288 ├── summary # not implemented yet | |
| 289 ├── TR_consensus_rank_1_.fasta <-- reconstructed monomer sequences for HIGH confidence satellites | |
| 290 ├── TR_consensus_rank_2_.fasta <-- reconstructed monomer sequences for LOW confidence satellites | |
| 291 ├── TR_consensus_rank_3_.fasta <-- reconstructed sequences of potential LTR elements | |
| 292 └── TR_consensus_rank_4_.fasta <-- reconstructed consensus for rDNA | |
| 293 </pre> | |
| 294 </div> | |
| 295 | |
| 296 <p> | |
| 297 List of all clusters which is available in HTML file <code>index.html</code> is also | |
| 298 available in tab delimited format in the file <code>clusters_info.csv</code> which can be | |
| 299 easily viewed and edited in spreadsheet editing programs. List of all clusters | |
| 300 and the corresponding reads is in the file <code>hitsort.cls</code> which has the following | |
| 301 format: | |
| 302 </p> | |
| 303 | |
| 304 <pre class="example"> | |
| 305 >CL1 11 | |
| 306 134234r 55494f 85525f 136746r 96742f 91926f 239729r 105445f 222518r 136402r 9013 | |
| 307 >CL2 10 | |
| 308 76205r 120735r 69527r 12235r 176778f 189307f 131952f 163507f 100038r 178475r | |
| 309 >CL3 6 | |
| 310 99835r 222598f 29715r 102023f 99524r 30116f | |
| 311 >CL4 6 | |
| 312 51723r 69073r 218774r 146425f 136314r 41744f | |
| 313 >CL5 5 | |
| 314 70686f 65565f 234078r 50430r 68247r | |
| 315 </pre> | |
| 316 | |
| 317 <p> | |
| 318 where <code>CL1 11</code> is the cluster ID followed by number of reads in the cluster; | |
| 319 next line contains list of all read names belonging to the cluster. | |
| 320 </p> | |
| 321 </div> | |
| 322 <div id="outline-container-orgheadline6" class="outline-3"> | |
| 323 <h3 id="orgheadline6"><span class="section-number-3">4.1</span> structure of cluster directories</h3> | |
| 324 <div class="outline-text-3" id="text-4-1"> | |
| 325 <p> | |
| 326 Detailed information for each cluster is stored is subdirectories: | |
| 327 </p> | |
| 328 | |
| 329 <div class="org-src-container"> | |
| 330 | |
| 331 <pre class="src src-folder">dir_CL0011 | |
| 332 ├── blast.csv <------------tab delimited file, all-to-all comparison od reads within cluster | |
| 333 ├── CL11_directed_graph.RData <----directed graph representation of cluster saved as R igraph object | |
| 334 ├── CL11.GL <-----------------undirected graph representation of cluster saved as R igraph object | |
| 335 ├── CL11.png <-----------┐- images with graph visualization | |
| 336 ├── CL11_tmb.png <-----------┘ | |
| 337 ├── dna_database_annotation.csv <-- annotation of cluster reads based on the DNA database of repeats | |
| 338 ├── reads_all.fas <---------------- all reads included in the cluster in fasta format | |
| 339 ├── reads.fas <---------------- subset of reads used for monomer reconstruction | |
| 340 ├── reads_oriented.fas <------------ subset of reads all in the same orientation | |
| 341 └── tarean | |
| 342 ├── consensus.fasta <----------- fasta file with tandem repeat consensus variants | |
| 343 ├── ggmin.RData | |
| 344 ├── img | |
| 345 │ ├── graph_11mer_1.png <-----┐ | |
| 346 │ ├── graph_11mer_2.png <-----│ | |
| 347 │ ├── graph_15mer_2.png <-----│ | |
| 348 │ ├── graph_15mer_3.png <-----│ | |
| 349 │ ├── graph_15mer_4.png <-----│ images of kmer-based graphs used for reconstruction of | |
| 350 │ ├── graph_19mer_2.png <-----│ monomer variants | |
| 351 │ ├── graph_19mer_4.png <-----│ | |
| 352 │ ├── graph_19mer_5.png <-----│ | |
| 353 │ ├── graph_23mer_2.png <-----│ | |
| 354 │ ├── graph_27mer_3.png <-----┘ | |
| 355 │ │ | |
| 356 │ ├── logo_11mer_1.png <-----┐ | |
| 357 │ ├── logo_11mer_2.png <-----│ | |
| 358 │ ├── logo_15mer_2.png <-----│ | |
| 359 │ ├── logo_15mer_3.png <-----│ | |
| 360 │ ├── logo_15mer_4.png <-----│ images with DNA logos representing consensus sequences | |
| 361 │ ├── logo_19mer_2.png <-----│ of monomer variants | |
| 362 │ ├── logo_19mer_4.png <-----│ | |
| 363 │ ├── logo_19mer_5.png <-----│ | |
| 364 │ ├── logo_23mer_2.png <-----│ | |
| 365 │ └── logo_27mer_3.png <-----┘ | |
| 366 │ | |
| 367 ├── ppm_11mer_1.csv <-----┐ | |
| 368 ├── ppm_11mer_2.csv <-----│ | |
| 369 ├── ppm_15mer_2.csv <-----│ | |
| 370 ├── ppm_15mer_3.csv <-----│ | |
| 371 ├── ppm_15mer_4.csv <-----│ position probability matrices for individual monomer | |
| 372 ├── ppm_19mer_2.csv <-----│ variants derived from k-mer frequencies | |
| 373 ├── ppm_19mer_4.csv <-----│ | |
| 374 ├── ppm_19mer_5.csv <-----│ | |
| 375 ├── ppm_23mer_2.csv <-----│ | |
| 376 ├── ppm_27mer_3.csv <-----┘ | |
| 377 │ | |
| 378 ├── reads_oriented.fas_11.kmers <-----┐ | |
| 379 ├── reads_oriented.fas_15.kmers <-----│ | |
| 380 ├── reads_oriented.fas_19.kmers <-----│ k-mer frequencies calculated on oriented reads | |
| 381 ├── reads_oriented.fas_23.kmers <-----│ for k-mer lengths 11 - 27 | |
| 382 ├── reads_oriented.fas_27.kmers <-----┘ | |
| 383 ├── reads_oriented.fasblast_out.cvs <---------┐results of blastn search against database of tRNA | |
| 384 ├── reads_oriented.fasblast_out.cvs_L.csv <----│for purposes of LTR detection | |
| 385 ├── reads_oriented.fasblast_out.cvs_R.csv <----┘ | |
| 386 └── report.html <--- cluster analysisHTML summary | |
| 387 </pre> | |
| 388 </div> | |
| 389 </div> | |
| 390 </div> | |
| 391 </div> | |
| 392 </div> | |
| 393 <div id="postamble" class="status"> | |
| 394 <p class="author">Author: petr</p> | |
| 395 <p class="date">Created: 2016-10-21 Pá 11:06</p> | |
| 396 <p class="validation"><a href="http://validator.w3.org/check?uri=referer">Validate</a></p> | |
| 397 </div> | |
| 398 </body> | |
| 399 </html> | 
