Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.cpp @ 1:bec36315bd12 default tip
Deleted selected files
author | aaronquinlan |
---|---|
date | Sat, 19 Nov 2011 14:17:03 -0500 |
parents | dfcd8b6c1bda |
children |
line wrap: on
line diff
--- a/BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,255 +0,0 @@ -/***************************************************************************** - unionBedGraphs.cpp - - (c) 2010 - Assaf Gordon, CSHL - - Aaron Quinlan, UVA - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include <cassert> -#include <cstring> -#include <cstdlib> -#include <iostream> -#include <algorithm> - -#include "bedGraphFile.h" -#include "unionBedGraphs.h" - -using namespace std; - - -UnionBedGraphs::UnionBedGraphs(std::ostream& _output, - const vector<string>& _filenames, - const vector<string>& _titles, - bool _print_empty_regions, - const std::string& _genome_size_filename, - const std::string& _no_coverage_value ) : - filenames(_filenames), - titles(_titles), - output(_output), - current_non_zero_inputs(0), - print_empty_regions(_print_empty_regions), - genome_sizes(NULL), - no_coverage_value(_no_coverage_value) -{ - if (print_empty_regions) { - assert(!_genome_size_filename.empty()); - - genome_sizes = new GenomeFile(_genome_size_filename); - } -} - - -UnionBedGraphs::~UnionBedGraphs() { - CloseBedgraphFiles(); - if (genome_sizes) { - delete genome_sizes; - genome_sizes = NULL ; - } -} - - -void UnionBedGraphs::Union() { - OpenBedgraphFiles(); - - // Add the first interval from each file - for(size_t i=0;i<bedgraph_files.size();++i) - LoadNextBedgraphItem(i); - - // Chromosome loop - once per chromosome - do { - // Find the first chromosome to use - current_chrom = DetermineNextChrom(); - - // Populate the queue with initial values from all files - // (if they belong to the correct chromosome) - for(size_t i=0;i<bedgraph_files.size();++i) - AddInterval(i); - - CHRPOS current_start = ConsumeNextCoordinate(); - - // User wanted empty regions, and the first coordinate is not 0 - print a dummy empty coverage - if (print_empty_regions && current_start > 0) - PrintEmptyCoverage(0,current_start); - - // Intervals loop - until all intervals (of current chromosome) from all files are used. - do { - CHRPOS current_end = queue.top().coord; - PrintCoverage(current_start, current_end); - current_start = ConsumeNextCoordinate(); - } while (!queue.empty()); - - // User wanted empty regions, and the last coordinate is not the last coordinate of the chromosome - // print a dummy empty coverage - if (print_empty_regions) { - CHRPOS chrom_size = genome_sizes->getChromSize(current_chrom); - if (current_start < chrom_size) - PrintEmptyCoverage(current_start, chrom_size); - } - - } while (!AllFilesDone()); -} - - -CHRPOS UnionBedGraphs::ConsumeNextCoordinate() { - assert(!queue.empty()); - - CHRPOS new_position = queue.top().coord; - do { - IntervalItem item = queue.top(); - UpdateInformation(item); - queue.pop(); - } while (!queue.empty() && queue.top().coord == new_position); - - return new_position; -} - - -void UnionBedGraphs::UpdateInformation(const IntervalItem &item) { - // Update the depth coverage for this file - - // Which coordinate is it - start or end? - switch (item.coord_type) - { - case START: - current_depth[item.source_index] = item.depth; - current_non_zero_inputs++; - break; - case END: - //Read the next interval from this file - AddInterval(item.source_index); - current_depth[item.source_index] = no_coverage_value; - current_non_zero_inputs--; - break; - default: - assert(0); - } -} - - -void UnionBedGraphs::PrintHeader() { - output << "chrom\tstart\tend" ; - for (size_t i=0;i<titles.size();++i) - output << "\t" <<titles[i]; - output << endl; -} - - -void UnionBedGraphs::PrintCoverage(CHRPOS start, CHRPOS end) { - if ( current_non_zero_inputs == 0 && ! print_empty_regions ) - return ; - - output << current_chrom << "\t" - << start << "\t" - << end; - - for (size_t i=0;i<current_depth.size();++i) - output << "\t" << current_depth[i] ; - - output << endl; -} - - -void UnionBedGraphs::PrintEmptyCoverage(CHRPOS start, CHRPOS end) { - output << current_chrom << "\t" - << start << "\t" - << end; - - for (size_t i=0;i<current_depth.size();++i) - output << "\t" << no_coverage_value ; - - output << endl; -} - - -void UnionBedGraphs::LoadNextBedgraphItem(int index) { - assert(static_cast<unsigned int>(index) < bedgraph_files.size()); - - current_bedgraph_item[index].chrom=""; - - BedGraphFile *file = bedgraph_files[index]; - BEDGRAPH_STR bg; - int lineNum = 0; - BedGraphLineStatus status; - - while ( (status = file->GetNextBedGraph(bg, lineNum)) != BEDGRAPH_INVALID ) { - if (status != BEDGRAPH_VALID) - continue; - - current_bedgraph_item[index] = bg ; - break; - } -} - - -bool UnionBedGraphs::AllFilesDone() { - for (size_t i=0;i<current_bedgraph_item.size();++i) - if (!current_bedgraph_item[i].chrom.empty()) - return false; - return true; -} - - -string UnionBedGraphs::DetermineNextChrom() { - string next_chrom; - for (size_t i=0;i<current_bedgraph_item.size();++i) { - if (current_bedgraph_item[i].chrom.empty()) - continue; - - if (next_chrom.empty()) - next_chrom = current_bedgraph_item[i].chrom; - else - if (current_bedgraph_item[i].chrom < next_chrom) - next_chrom = current_bedgraph_item[i].chrom ; - } - return next_chrom; -} - - -void UnionBedGraphs::AddInterval(int index) { - assert(static_cast<unsigned int>(index) < bedgraph_files.size()); - - //This file has no more intervals - if (current_bedgraph_item[index].chrom.empty()) - return ; - - //If the next interval belongs to a different chrom, don't add it - if (current_bedgraph_item[index].chrom!=current_chrom) - return ; - - const BEDGRAPH_STR &bg(current_bedgraph_item[index]); - - IntervalItem start_item(index, START, bg.start, bg.depth); - IntervalItem end_item(index, END, bg.end, bg.depth); - - queue.push(start_item); - queue.push(end_item); - - LoadNextBedgraphItem(index); -} - - -void UnionBedGraphs::OpenBedgraphFiles() { - for (size_t i=0;i<filenames.size();++i) { - BedGraphFile *file = new BedGraphFile(filenames[i]); - file->Open(); - bedgraph_files.push_back(file); - - current_depth.push_back(no_coverage_value); - } - current_bedgraph_item.resize(filenames.size()); -} - - -void UnionBedGraphs::CloseBedgraphFiles() { - for (size_t i=0;i<bedgraph_files.size();++i) { - BedGraphFile *file = bedgraph_files[i]; - delete file; - bedgraph_files[i] = NULL ; - } - bedgraph_files.clear(); -}