| 
0
 | 
     1 /*****************************************************************************
 | 
| 
 | 
     2   pairToBed.h
 | 
| 
 | 
     3 
 | 
| 
 | 
     4   (c) 2009 - Aaron Quinlan
 | 
| 
 | 
     5   Hall Laboratory
 | 
| 
 | 
     6   Department of Biochemistry and Molecular Genetics
 | 
| 
 | 
     7   University of Virginia
 | 
| 
 | 
     8   aaronquinlan@gmail.com
 | 
| 
 | 
     9 
 | 
| 
 | 
    10   Licenced under the GNU General Public License 2.0 license.
 | 
| 
 | 
    11 ******************************************************************************/
 | 
| 
 | 
    12 #ifndef INTERSECTBED_H
 | 
| 
 | 
    13 #define INTERSECTBED_H
 | 
| 
 | 
    14 
 | 
| 
 | 
    15 #include "api/BamReader.h"
 | 
| 
 | 
    16 #include "api/BamWriter.h"
 | 
| 
 | 
    17 #include "api/BamAux.h"
 | 
| 
 | 
    18 using namespace BamTools;
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 #include "bedFile.h"
 | 
| 
 | 
    21 #include "bedFilePE.h"
 | 
| 
 | 
    22 #include <vector>
 | 
| 
 | 
    23 #include <iostream>
 | 
| 
 | 
    24 #include <fstream>
 | 
| 
 | 
    25 
 | 
| 
 | 
    26 using namespace std;
 | 
| 
 | 
    27 
 | 
| 
 | 
    28 
 | 
| 
 | 
    29 
 | 
| 
 | 
    30 /**************************************************
 | 
| 
 | 
    31 Helper function protoypes
 | 
| 
 | 
    32 **************************************************/
 | 
| 
 | 
    33 void IsCorrectMappingForBEDPE (const BamAlignment &bam, const RefVector &refs, BEDPE &a);
 | 
| 
 | 
    34 
 | 
| 
 | 
    35 
 | 
| 
 | 
    36 
 | 
| 
 | 
    37 //************************************************
 | 
| 
 | 
    38 // Class methods and elements
 | 
| 
 | 
    39 //************************************************
 | 
| 
 | 
    40 class BedIntersectPE {
 | 
| 
 | 
    41 
 | 
| 
 | 
    42 public:
 | 
| 
 | 
    43 
 | 
| 
 | 
    44     // constructor
 | 
| 
 | 
    45     BedIntersectPE(string bedAFilePE, string bedBFile, float overlapFraction,
 | 
| 
 | 
    46         string searchType, bool sameStrand, bool diffStrand, bool bamInput, bool bamOutput, bool uncompressedBam, bool useEditDistance);
 | 
| 
 | 
    47     // destructor
 | 
| 
 | 
    48     ~BedIntersectPE(void);
 | 
| 
 | 
    49 
 | 
| 
 | 
    50     void FindOverlaps(const BEDPE &, vector<BED> &hits1, vector<BED> &hits2, const string &type);
 | 
| 
 | 
    51 
 | 
| 
 | 
    52     bool FindOneOrMoreOverlaps(const BEDPE &, const string &type);
 | 
| 
 | 
    53 
 | 
| 
 | 
    54     void FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, const string &type);
 | 
| 
 | 
    55     bool FindOneOrMoreSpanningOverlaps(const BEDPE &a, const string &type);
 | 
| 
 | 
    56 
 | 
| 
 | 
    57     void IntersectBedPE();
 | 
| 
 | 
    58     void IntersectBamPE(string bamFile);
 | 
| 
 | 
    59 
 | 
| 
 | 
    60     void DetermineBedPEInput();
 | 
| 
 | 
    61 
 | 
| 
 | 
    62 private:
 | 
| 
 | 
    63 
 | 
| 
 | 
    64     string _bedAFilePE;
 | 
| 
 | 
    65     string _bedBFile;
 | 
| 
 | 
    66     float _overlapFraction;
 | 
| 
 | 
    67     string _searchType;
 | 
| 
 | 
    68     bool _sameStrand;
 | 
| 
 | 
    69     bool _diffStrand;
 | 
| 
 | 
    70     bool _useEditDistance;
 | 
| 
 | 
    71     bool _bamInput;
 | 
| 
 | 
    72     bool _bamOutput;
 | 
| 
 | 
    73     bool  _isUncompressedBam;
 | 
| 
 | 
    74 
 | 
| 
 | 
    75     // instance of a paired-end bed file class.
 | 
| 
 | 
    76     BedFilePE *_bedA;
 | 
| 
 | 
    77 
 | 
| 
 | 
    78     // instance of a bed file class.
 | 
| 
 | 
    79     BedFile *_bedB;
 | 
| 
 | 
    80 
 | 
| 
 | 
    81     inline
 | 
| 
 | 
    82     void ConvertBamToBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, BEDPE &a) {
 | 
| 
 | 
    83 
 | 
| 
 | 
    84         // initialize BEDPE variables
 | 
| 
 | 
    85         a.start1 = a.start2 = a.end1 = a.end2 = -1;
 | 
| 
 | 
    86         a.chrom1 = a.chrom2 = ".";
 | 
| 
 | 
    87         a.strand1 = a.strand2 = '.';
 | 
| 
 | 
    88         uint32_t editDistance1, editDistance2;
 | 
| 
 | 
    89         editDistance1 = editDistance2 = 0;
 | 
| 
 | 
    90 
 | 
| 
 | 
    91         // take the qname from end 1.
 | 
| 
 | 
    92         a.name = bam1.Name;
 | 
| 
 | 
    93 
 | 
| 
 | 
    94         // end 1
 | 
| 
 | 
    95         if (bam1.IsMapped()) {
 | 
| 
 | 
    96             a.chrom1  = refs.at(bam1.RefID).RefName;
 | 
| 
 | 
    97             a.start1  = bam1.Position;
 | 
| 
 | 
    98             a.end1    = bam1.GetEndPosition(false, false);
 | 
| 
 | 
    99             a.strand1 = "+";
 | 
| 
 | 
   100             if (bam1.IsReverseStrand()) a.strand1 = "-";
 | 
| 
 | 
   101 
 | 
| 
 | 
   102             // extract the edit distance from the NM tag
 | 
| 
 | 
   103             // if possible. otherwise, complain.
 | 
| 
 | 
   104             if (_useEditDistance == true) {
 | 
| 
 | 
   105                 if (bam1.GetTag("NM", editDistance1) == false) {
 | 
| 
 | 
   106                     cerr << "The edit distance tag (NM) was not found in the BAM file.  Please disable -ed.  Exiting\n";
 | 
| 
 | 
   107                     exit(1);
 | 
| 
 | 
   108                 }
 | 
| 
 | 
   109             }
 | 
| 
 | 
   110         }
 | 
| 
 | 
   111 
 | 
| 
 | 
   112         // end 2
 | 
| 
 | 
   113         if (bam2.IsMapped()) {
 | 
| 
 | 
   114             a.chrom2  = refs.at(bam2.RefID).RefName;
 | 
| 
 | 
   115             a.start2  = bam2.Position;
 | 
| 
 | 
   116             a.end2    = bam2.GetEndPosition(false, false);
 | 
| 
 | 
   117             a.strand2 = "+";
 | 
| 
 | 
   118             if (bam2.IsReverseStrand()) a.strand2 = "-";
 | 
| 
 | 
   119 
 | 
| 
 | 
   120             // extract the edit distance from the NM tag
 | 
| 
 | 
   121             // if possible. otherwise, complain.
 | 
| 
 | 
   122             if (_useEditDistance == true) {
 | 
| 
 | 
   123                 if (bam2.GetTag("NM", editDistance2) == false) {
 | 
| 
 | 
   124                     cerr << "The edit distance tag (NM) was not found in the BAM file.  Please disable -ed.  Exiting\n";
 | 
| 
 | 
   125                     exit(1);
 | 
| 
 | 
   126                 }
 | 
| 
 | 
   127             }
 | 
| 
 | 
   128         }
 | 
| 
 | 
   129 
 | 
| 
 | 
   130         // swap the ends if necessary
 | 
| 
 | 
   131         if ( a.chrom1 > a.chrom2 || ((a.chrom1 == a.chrom2) && (a.start1 > a.start2)) ) {
 | 
| 
 | 
   132             swap(a.chrom1, a.chrom2);
 | 
| 
 | 
   133             swap(a.start1, a.start2);
 | 
| 
 | 
   134             swap(a.end1, a.end2);
 | 
| 
 | 
   135             swap(a.strand1, a.strand2);
 | 
| 
 | 
   136         }
 | 
| 
 | 
   137 
 | 
| 
 | 
   138         // compute the minimum mapping quality b/w the two ends of the pair.
 | 
| 
 | 
   139         a.score = "0";
 | 
| 
 | 
   140         if (_useEditDistance == false) {
 | 
| 
 | 
   141             if (bam1.IsMapped() == true && bam2.IsMapped() == true)
 | 
| 
 | 
   142                 a.score = ToString(min(bam1.MapQuality, bam2.MapQuality));
 | 
| 
 | 
   143         }
 | 
| 
 | 
   144         // BEDPE using edit distance
 | 
| 
 | 
   145         else {
 | 
| 
 | 
   146             if (bam1.IsMapped() == true && bam2.IsMapped() == true)
 | 
| 
 | 
   147                 a.score = ToString((int) (editDistance1 + editDistance2));
 | 
| 
 | 
   148             else if (bam1.IsMapped() == true)
 | 
| 
 | 
   149                 a.score = ToString((int) editDistance1);
 | 
| 
 | 
   150             else if (bam2.IsMapped() == true)
 | 
| 
 | 
   151                 a.score = ToString((int) editDistance2);
 | 
| 
 | 
   152         }
 | 
| 
 | 
   153     };
 | 
| 
 | 
   154 
 | 
| 
 | 
   155     inline
 | 
| 
 | 
   156     void ProcessBamBlock (const BamAlignment &bam1, const BamAlignment &bam2,
 | 
| 
 | 
   157                                           const RefVector &refs,
 | 
| 
 | 
   158                                           BamWriter &writer);
 | 
| 
 | 
   159 };
 | 
| 
 | 
   160 
 | 
| 
 | 
   161 #endif /* PEINTERSECTBED_H */
 |