Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.h @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.h Thu Nov 03 10:25:04 2011 -0400 @@ -0,0 +1,125 @@ +/***************************************************************************** + multiIntersectBed.h + + (c) 2010 - Aaron Quinlan, UVA + - Assaf Gordon, CSHL + Quinlan Laboratory + Department of Public Health Sciences + Center for Public Health Genomics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#ifndef MULTIINTERSECTBED_H +#define MULTIINTERSECTBED_H + +#include <vector> +#include <string> +#include "bedFile.h" +#include "genomeFile.h" +#include "intervalItem.h" + +class MultiIntersectBed +{ +private: + + vector<string> filenames; + vector<string> titles; + + vector<BedFile*> input_files; + vector<int> current_depth; + vector<BED> current_item; + + std::ostream &output; + + INTERVALS_PRIORITY_QUEUE queue; + std::string current_chrom; + map<int, bool> files_with_coverage; + int current_non_zero_inputs; + bool print_empty_regions; + bool haveTitles; + + GenomeFile* genome_sizes; + + std::string no_coverage_value; + +public: + MultiIntersectBed(std::ostream& _output, + const vector<string>& _filenames, + const vector<string>& _titles, + bool _print_empty_regions, + const std::string& _genomeFileName, + const std::string& _no_coverage_value); + + virtual ~MultiIntersectBed(); + + // Combines all interval files + void MultiIntersect(); + + // Print the header line: chrom/start/end + name of each bedgraph file. + void PrintHeader(); + + +private: + + // Open all input files, initialize "current_XXX" vectors + void OpenFiles(); + + // Close the input files. + void CloseFiles(); + + /* + Add an interval from BedGraph file 'index' into the queue. + will only be added if it belongs to the current chromosome. + + If the interval was added (=consumed), the next interval will be read from the file + using 'LoadNextItem' + */ + void AddInterval(int index); + + /* + Loads the next interval from Bed file 'index'. + Stores it in 'current_bed_item' vector. + */ + void LoadNextItem(int index); + + /* + Scans the 'current_bedgraph_item' vector, + find the 'first' chromosome to use (different BedGraph files can start with different chromosomes). + */ + std::string DetermineNextChrom(); + + /* + Returns 'true' if ALL intervals from ALL BedGraph files were used + */ + bool AllFilesDone(); + + /* + Extract the next coordinate from the queue, and updates the current coverage information. + If multiple interval share the same coordinate values, all of them are handled. + If an END coordinate is consumed, the next interval (from the corresponding file) is read. + */ + CHRPOS ConsumeNextCoordinate(); + + /* + Updates the coverage information based on the given item. + Item can be a START coordinate or an END coordiante. + */ + void UpdateInformation(const IntervalItem &item); + + /* + prints chrom/start/end and the current depth coverage values of all the files. + */ + void PrintCoverage(CHRPOS start, CHRPOS end); + + /* + prints chrom/start/end and the ZERO depth coverage values of all the files. + */ + void PrintEmptyCoverage(CHRPOS start, CHRPOS end); + + void DebugPrintQueue(); +}; + + +#endif