diff BEDTools-Version-2.14.3/src/mergeBed/mergeBed.cpp @ 1:bec36315bd12 default tip

Deleted selected files
author aaronquinlan
date Sat, 19 Nov 2011 14:17:03 -0500
parents dfcd8b6c1bda
children
line wrap: on
line diff
--- a/BEDTools-Version-2.14.3/src/mergeBed/mergeBed.cpp	Thu Nov 03 10:25:04 2011 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,364 +0,0 @@
-/*****************************************************************************
-  mergeBed.cpp
-
-  (c) 2009 - Aaron Quinlan
-  Hall Laboratory
-  Department of Biochemistry and Molecular Genetics
-  University of Virginia
-  aaronquinlan@gmail.com
-
-  Licenced under the GNU General Public License 2.0 license.
-******************************************************************************/
-#include "lineFileUtilities.h"
-#include "mergeBed.h"
-
-
-
-void BedMerge::ReportMergedNames(const vector<string> &names) {
-    if (names.size() > 0) {
-        printf("\t");
-        vector<string>::const_iterator nameItr = names.begin();
-        vector<string>::const_iterator nameEnd = names.end();
-        for (; nameItr != nameEnd; ++nameItr) {
-            if (nameItr < (nameEnd - 1))
-                cout << *nameItr << ";";
-            else
-                cout << *nameItr;
-        }
-    }
-    else {
-        cerr << endl 
-             << "*****" << endl 
-             << "*****ERROR: No names found to report for the -names option. Exiting." << endl 
-             << "*****" << endl;
-        exit(1);
-    }
-}
-
-
-void BedMerge::ReportMergedScores(const vector<string> &scores) {
-    if (scores.size() > 0) {
-        printf("\t");
-
-        // convert the scores to floats
-        vector<float> data;
-        for (size_t i = 0 ; i < scores.size() ; i++) {
-            data.push_back(atof(scores[i].c_str()));
-        }    
-
-        if (_scoreOp == "sum") {
-            printf("%.3f", accumulate(data.begin(), data.end(), 0.0));
-        }
-        else if (_scoreOp == "min") {
-            printf("%.3f", *min_element( data.begin(), data.end() ));
-        }
-        else if (_scoreOp == "max") {
-            printf("%.3f", *max_element( data.begin(), data.end() ));
-        }
-        else if (_scoreOp == "mean") {
-            double total = accumulate(data.begin(), data.end(), 0.0);
-            double mean = total / data.size();
-            printf("%.3f", mean);
-        }
-        else if (_scoreOp == "median") {
-            double median = 0.0;
-            sort(data.begin(), data.end());
-            int totalLines = data.size();
-            if ((totalLines % 2) > 0) {
-                long mid;
-                mid = totalLines / 2;
-                median = data[mid];
-            }
-            else {
-                long midLow, midHigh;
-                midLow = (totalLines / 2) - 1;
-                midHigh = (totalLines / 2);
-                median = (data[midLow] + data[midHigh]) / 2.0;
-            }
-            printf("%.3f", median);
-        }
-        else if ((_scoreOp == "mode") || (_scoreOp == "antimode")) {
-             // compute the frequency of each unique value
-             map<string, int> freqs;
-             vector<string>::const_iterator dIt  = scores.begin();
-             vector<string>::const_iterator dEnd = scores.end();
-             for (; dIt != dEnd; ++dIt) {
-                 freqs[*dIt]++;
-             }
-
-             // grab the mode and the anti mode
-             string mode, antiMode;
-             int    count = 0;
-             int minCount = INT_MAX;
-             for(map<string,int>::const_iterator iter = freqs.begin(); iter != freqs.end(); ++iter) {
-                 if (iter->second > count) {
-                     mode = iter->first;
-                     count = iter->second;
-                 }
-                 if (iter->second < minCount) {
-                     antiMode = iter->first;
-                     minCount = iter->second;
-                 }
-             }
-             // report
-             if (_scoreOp == "mode") {
-                 printf("%s", mode.c_str());
-             }
-             else if (_scoreOp == "antimode") {
-                 printf("%s", antiMode.c_str());
-             }
-         }
-         else if (_scoreOp == "collapse") {    
-            vector<string>::const_iterator scoreItr = scores.begin();
-            vector<string>::const_iterator scoreEnd = scores.end();
-            for (; scoreItr != scoreEnd; ++scoreItr) {
-                if (scoreItr < (scoreEnd - 1))
-                    cout << *scoreItr << ";";
-                else
-                    cout << *scoreItr;
-            }
-        }
-    }
-    else {        
-        cerr << endl 
-             << "*****" << endl 
-             << "*****ERROR: No scores found to report for the -scores option. Exiting." << endl 
-             << "*****" << endl;
-        exit(1);
-    }
-}
-
-// ===============
-// = Constructor =
-// ===============
-BedMerge::BedMerge(string &bedFile, 
-                   bool numEntries, 
-                   int  maxDistance, 
-                   bool forceStrand, 
-                   bool reportNames, 
-                   bool reportScores,
-                   const string &scoreOp) :
-    _bedFile(bedFile),
-    _numEntries(numEntries),
-    _forceStrand(forceStrand),
-    _reportNames(reportNames),
-    _reportScores(reportScores),
-    _scoreOp(scoreOp),
-    _maxDistance(maxDistance)
-{
-    _bed = new BedFile(bedFile);
-
-    if (_forceStrand == false)
-        MergeBed();
-    else
-        MergeBedStranded();
-}
-
-
-// =================
-// =   Destructor  =
-// =================
-BedMerge::~BedMerge(void) {
-}
-
-
-// ===============================================
-// Convenience method for reporting merged blocks
-// ================================================
-void BedMerge::Report(string chrom, int start, int end, 
-                      const vector<string> &names, const vector<string> &scores, int mergeCount) 
-{
-    // ARQ: removed to force all output to be zero-based, BED format, reagrdless of input type
-    //if (_bed->isZeroBased == false) {start++;}
-    
-    printf("%s\t%d\t%d", chrom.c_str(), start, end);
-    // just the merged intervals
-    if (_numEntries == false && _reportNames == false && _reportScores == false) {
-        printf("\n");
-    }
-    // merged intervals and counts    
-    else if (_numEntries == true && _reportNames == false && _reportScores == false) {
-        printf("\t%d\n", mergeCount);
-    }
-    // merged intervals and names        
-    else if (_numEntries == false && _reportNames == true && _reportScores == false) {
-        ReportMergedNames(names);
-        printf("\n");
-    }
-    // merged intervals and scores        
-    else if (_numEntries == false && _reportNames == false && _reportScores == true) {
-        ReportMergedScores(scores);
-        printf("\n");
-    }
-    // merged intervals, names, and scores        
-    else if (_numEntries == false && _reportNames == true && _reportScores == true) {
-        ReportMergedNames(names);
-        ReportMergedScores(scores);
-        printf("\n");
-    }
-}
-
-
-// =========================================================
-// Convenience method for reporting merged blocks by strand
-// =========================================================
-void BedMerge::ReportStranded(string chrom, int start, int end, 
-                              const vector<string> &names, const vector<string> &scores,
-                              int mergeCount, string strand) 
-{
-    // ARQ: removed to force all output to be zero-based, BED format, reagrdless of input type
-    //if (_bed->isZeroBased == false) {start++;}
-    
-    printf("%s\t%d\t%d", chrom.c_str(), start, end);
-    // just the merged intervals
-    if (_numEntries == false && _reportNames == false && _reportScores == false) {
-        printf("\t%s\n", strand.c_str());
-    }
-    // merged intervals and counts    
-    else if (_numEntries == true && _reportNames == false && _reportScores == false) {
-        printf("\t%d\t%s\n", mergeCount, strand.c_str());
-    }
-    // merged intervals and names        
-    else if (_numEntries == false && _reportNames == true && _reportScores == false) {
-        ReportMergedNames(names);
-        printf("\t%s\n", strand.c_str());
-    }
-    // merged intervals and scores        
-    else if (_numEntries == false && _reportNames == false && _reportScores == true) {
-        ReportMergedScores(scores);
-        printf("\t%s\n", strand.c_str());
-    }
-    // merged intervals, names, and scores        
-    else if (_numEntries == false && _reportNames == true && _reportScores == true) {
-        ReportMergedNames(names);
-        ReportMergedScores(scores);
-        printf("\t%s\n", strand.c_str());
-    }
-}
-
-
-// =====================================================
-// = Merge overlapping BED entries into a single entry =
-// =====================================================
-void BedMerge::MergeBed() {
-
-    // load the "B" bed file into a map so
-    // that we can easily compare "A" to it for overlaps
-    _bed->loadBedFileIntoMapNoBin();
-
-    // loop through each chromosome and merge their BED entries
-    masterBedMapNoBin::const_iterator m    = _bed->bedMapNoBin.begin();
-    masterBedMapNoBin::const_iterator mEnd = _bed->bedMapNoBin.end();
-    for (; m != mEnd; ++m) {
-
-        // bedList is already sorted by start position.
-        string chrom        = m->first;
-        vector<BED> bedList = m->second;
-        int mergeCount = 1;
-        vector<string> names;
-        vector<string> scores;
-        
-        // merge overlapping features for this chromosome.
-        int start = -1;
-        int end   = -1;
-        vector<BED>::const_iterator bedItr = bedList.begin();
-        vector<BED>::const_iterator bedEnd = bedList.end();
-        for (; bedItr != bedEnd; ++bedItr) {
-            // new block, no overlap
-            if ( (((int) bedItr->start - end) > _maxDistance) || (end < 0)) {
-                if (start >= 0) {
-                    Report(chrom, start, end, names, scores, mergeCount);
-                    // reset
-                    mergeCount = 1;
-                    names.clear();
-                    scores.clear();
-                }
-                start = bedItr->start;
-                end   = bedItr->end;
-                if (!bedItr->name.empty())  names.push_back(bedItr->name);
-                if (!bedItr->score.empty()) scores.push_back(bedItr->score);
-            }
-            // same block, overlaps
-            else {
-                if ((int) bedItr-> end > end) end = bedItr->end;
-                mergeCount++;
-                if (!bedItr->name.empty())  names.push_back(bedItr->name);
-                if (!bedItr->score.empty()) scores.push_back(bedItr->score);
-            }
-        }
-        if (start >= 0) {
-            Report(chrom, start, end, names, scores, mergeCount);
-        }
-    }
-}
-
-
-// ==================================================================================
-// = Merge overlapping BED entries into a single entry, accounting for strandedness =
-// ==================================================================================
-void BedMerge::MergeBedStranded() {
-
-    // load the "B" bed file into a map so
-    // that we can easily compare "A" to it for overlaps
-    _bed->loadBedFileIntoMapNoBin();
-
-    // loop through each chromosome and merge their BED entries
-    masterBedMapNoBin::const_iterator m    = _bed->bedMapNoBin.begin();
-    masterBedMapNoBin::const_iterator mEnd = _bed->bedMapNoBin.end();
-    for (; m != mEnd; ++m) {
-        
-        // bedList is already sorted by start position.
-        string chrom        = m->first;
-        vector<BED> bedList = m->second;
-
-        // make a list of the two strands to merge separately.
-        vector<string> strands(2);
-        strands[0] = "+";
-        strands[1] = "-";
-
-        // do two passes, one for each strand.
-        for (unsigned int s = 0; s < strands.size(); s++) {
-
-            int mergeCount = 1;
-            int numOnStrand = 0;
-            vector<string> names;
-            vector<string> scores;
-
-            // merge overlapping features for this chromosome.
-            int start = -1;
-            int end   = -1;
-            vector<BED>::const_iterator bedItr = bedList.begin();
-            vector<BED>::const_iterator bedEnd = bedList.end();
-            for (; bedItr != bedEnd; ++bedItr) {
-
-                // if forcing strandedness, move on if the hit
-                // is not on the current strand.
-                if (bedItr->strand != strands[s]) { continue; }
-                else { numOnStrand++; }
-                
-                if ( (((int) bedItr->start - end) > _maxDistance) || (end < 0)) {
-                    if (start >= 0) {
-                        ReportStranded(chrom, start, end, names, scores, mergeCount, strands[s]);
-                        // reset
-                        mergeCount = 1;
-                        names.clear();
-                        scores.clear();
-                    }
-                    start = bedItr->start;
-                    end   = bedItr->end;
-                    if (!bedItr->name.empty())  names.push_back(bedItr->name);
-                    if (!bedItr->score.empty()) scores.push_back(bedItr->score);
-                }
-                else {
-                    if ((int) bedItr-> end > end) end = bedItr->end;
-                    mergeCount++;
-                    if (!bedItr->name.empty())  names.push_back(bedItr->name);
-                    if (!bedItr->score.empty()) scores.push_back(bedItr->score);
-                }
-            }
-            if (start >= 0) {
-                ReportStranded(chrom, start, end, names, scores, mergeCount, strands[s]);
-            }
-        }
-    }
-}