diff BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.h @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.h	Thu Nov 03 10:25:04 2011 -0400
@@ -0,0 +1,123 @@
+/*****************************************************************************
+  unionBedGraphs.h
+
+  (c) 2010 - Assaf Gordon, CSHL
+           - Aaron Quinlan, UVA
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef UNIONBEDGRAPHS_H
+#define UNIONBEDGRAPHS_H
+
+#include <vector>
+#include <string>
+#include "bedGraphFile.h"
+#include "genomeFile.h"
+#include "intervalItem.h"
+
+class UnionBedGraphs
+{
+private:
+    typedef BEDGRAPH_STR BEDGRAPH_TYPE;
+
+    vector<string>  filenames;
+    vector<string>  titles;
+
+    vector<BedGraphFile*>               bedgraph_files;
+    vector<BEDGRAPH_TYPE::DEPTH_TYPE>   current_depth;
+    vector<BEDGRAPH_TYPE>               current_bedgraph_item;
+
+    std::ostream    &output;
+
+    INTERVALS_PRIORITY_QUEUE queue;
+    std::string              current_chrom;
+    int                      current_non_zero_inputs;
+    bool                     print_empty_regions;
+
+    GenomeFile* genome_sizes;
+
+    std::string no_coverage_value;
+
+public:
+    UnionBedGraphs(std::ostream& _output,
+            const vector<string>& _filenames,
+            const vector<string>& _titles,
+            bool _print_empty_regions,
+            const std::string& _genomeFileName,
+            const std::string& _no_coverage_value);
+
+    virtual ~UnionBedGraphs();
+
+    // Combines all bedgraph files
+    void Union();
+
+    // Print the header line: chrom/start/end + name of each bedgraph file.
+    void PrintHeader();
+
+
+private:
+
+    // Open all BedGraph files, initialize "current_XXX" vectors
+    void OpenBedgraphFiles();
+
+    // Close the BedGraph files.
+    void CloseBedgraphFiles();
+
+    /*
+       Add an interval from BedGraph file 'index' into the queue.
+       will only be added if it belongs to the current chromosome.
+
+       If the interval was added (=consumed), the next interval will be read from the file
+       using 'LoadNextBedgraphItem'
+     */
+    void AddInterval(int index);
+
+    /*
+       Loads the next interval from BedGraph file 'index'.
+       Stores it in 'current_bedgraph_item' vector.
+     */
+    void LoadNextBedgraphItem(int index);
+
+    /*
+       Scans the 'current_bedgraph_item' vector,
+       find the 'first' chromosome to use (different BedGraph files can start with different chromosomes).
+     */
+    std::string DetermineNextChrom();
+
+    /*
+       Returns 'true' if ALL intervals from ALL BedGraph files were used
+    */
+    bool        AllFilesDone();
+
+    /*
+       Extract the next coordinate from the queue, and updates the current coverage information.
+       If multiple interval share the same coordinate values, all of them are handled.
+       If an END coordinate is consumed, the next interval (from the corresponding file) is read.
+     */
+    CHRPOS ConsumeNextCoordinate();
+
+    /*
+       Updates the coverage information based on the given item.
+       Item can be a START coordinate or an END coordiante.
+     */
+    void UpdateInformation(const IntervalItem &item);
+
+    /*
+       prints chrom/start/end and the current depth coverage values of all the files.
+     */
+    void PrintCoverage(CHRPOS start, CHRPOS end);
+
+    /*
+       prints chrom/start/end and the ZERO depth coverage values of all the files.
+     */
+    void PrintEmptyCoverage(CHRPOS start, CHRPOS end);
+
+    void DebugPrintQueue();
+};
+
+
+#endif