diff BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.h @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.h	Thu Nov 03 10:25:04 2011 -0400
@@ -0,0 +1,125 @@
+/*****************************************************************************
+  multiIntersectBed.h
+
+  (c) 2010 - Aaron Quinlan, UVA
+           - Assaf Gordon, CSHL
+  Quinlan Laboratory
+  Department of Public Health Sciences
+  Center for Public Health Genomics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef MULTIINTERSECTBED_H
+#define MULTIINTERSECTBED_H
+
+#include <vector>
+#include <string>
+#include "bedFile.h"
+#include "genomeFile.h"
+#include "intervalItem.h"
+
+class MultiIntersectBed
+{
+private:
+
+    vector<string>  filenames;
+    vector<string>  titles;
+
+    vector<BedFile*>   input_files;
+    vector<int>        current_depth;
+    vector<BED>        current_item;
+
+    std::ostream    &output;
+
+    INTERVALS_PRIORITY_QUEUE queue;
+    std::string              current_chrom;
+    map<int, bool>           files_with_coverage;
+    int                      current_non_zero_inputs;
+    bool                     print_empty_regions;
+    bool                     haveTitles;
+    
+    GenomeFile* genome_sizes;
+
+    std::string no_coverage_value;
+
+public:
+    MultiIntersectBed(std::ostream& _output,
+            const vector<string>& _filenames,
+            const vector<string>& _titles,
+            bool _print_empty_regions,
+            const std::string& _genomeFileName,
+            const std::string& _no_coverage_value);
+
+    virtual ~MultiIntersectBed();
+
+    // Combines all interval files
+    void MultiIntersect();
+
+    // Print the header line: chrom/start/end + name of each bedgraph file.
+    void PrintHeader();
+
+
+private:
+
+    // Open all input files, initialize "current_XXX" vectors
+    void OpenFiles();
+
+    // Close the input files.
+    void CloseFiles();
+
+    /*
+       Add an interval from BedGraph file 'index' into the queue.
+       will only be added if it belongs to the current chromosome.
+
+       If the interval was added (=consumed), the next interval will be read from the file
+       using 'LoadNextItem'
+     */
+    void AddInterval(int index);
+
+    /*
+       Loads the next interval from Bed file 'index'.
+       Stores it in 'current_bed_item' vector.
+     */
+    void LoadNextItem(int index);
+
+    /*
+       Scans the 'current_bedgraph_item' vector,
+       find the 'first' chromosome to use (different BedGraph files can start with different chromosomes).
+     */
+    std::string DetermineNextChrom();
+
+    /*
+       Returns 'true' if ALL intervals from ALL BedGraph files were used
+    */
+    bool        AllFilesDone();
+
+    /*
+       Extract the next coordinate from the queue, and updates the current coverage information.
+       If multiple interval share the same coordinate values, all of them are handled.
+       If an END coordinate is consumed, the next interval (from the corresponding file) is read.
+     */
+    CHRPOS ConsumeNextCoordinate();
+
+    /*
+       Updates the coverage information based on the given item.
+       Item can be a START coordinate or an END coordiante.
+     */
+    void UpdateInformation(const IntervalItem &item);
+
+    /*
+       prints chrom/start/end and the current depth coverage values of all the files.
+     */
+    void PrintCoverage(CHRPOS start, CHRPOS end);
+
+    /*
+       prints chrom/start/end and the ZERO depth coverage values of all the files.
+     */
+    void PrintEmptyCoverage(CHRPOS start, CHRPOS end);
+
+    void DebugPrintQueue();
+};
+
+
+#endif